C++如何获取一个网页的源代码
加入想获取http://www.baidu.com/s?cl=3&wd=天安门这个网页的源代码,该如何实现。
菜鸟一个,求代码。
[解决办法]
WebBrowser control
[解决办法]
我想用C++写一个程序,可以输入关键字,然后按关键字找好多邮箱保存起来。
[解决办法]
网上刚抄的:
#include <stdio.h>
#include <stdlib.h>
#include <string.h>
#include <unistd.h>
#include <curl/curl.h>
#include <pthread.h>
const int MAXP=10;
pthread_mutex_t mutex;
size_t write_data(void *ptr, size_t size, size_t nmemb, void *stream){
int written = fwrite(ptr, size, nmemb, (FILE *)stream);
return written;
}
void* geturl(void* url){
CURL* curl=curl_easy_init();
curl_easy_setopt(curl,CURLOPT_URL,url);
curl_easy_setopt(curl,CURLOPT_FOLLOWLOCATION,1);
curl_easy_setopt(curl,CURLOPT_WRITEFUNCTION,write_data);
char* s=(char*)malloc(strlen(url)+1);
memset(s,0,strlen(url)+1);
memcpy(s,(char*)url,strlen(url));
int i=0;
for(;i<strlen(s);++i)
if(!((s[i]>='0'&&s[i]<='9')
[解决办法]
(s[i]>='a'&&s[i]<='z')
[解决办法]
(s[i]>='A'&&s[i]<='Z')))
s[i]='0';
FILE *file=fopen(s,"w");
if(!file){
printf("file create error\n");
curl_easy_cleanup(curl);
return ;
}
curl_easy_setopt(curl,CURLOPT_WRITEDATA,file);
curl_easy_perform(curl);
fclose(file);
curl_easy_cleanup(curl);
}
void* handle(void* arg){
char name[200];
while(1){
memset(name,0,sizeof name);
pthread_mutex_lock(&mutex);
if(scanf("%s\n",name)==EOF){
pthread_mutex_unlock(&mutex);
return NULL;
}
pthread_mutex_unlock(&mutex);
geturl((void*)name);
}
}
int main(int argc,char **argv){
curl_global_init(CURL_GLOBAL_ALL);
pthread_mutex_init(&mutex,NULL);
pthread_t p[MAXP];
freopen("in.txt","r",stdin);
int i=0;
for(;i<MAXP;++i)
pthread_create(&p[i],NULL,handle,NULL);
for(i=0;i<MAXP;++i)
pthread_join(p[i],NULL);
return 0;
}
//通过Wget来获取网页
string GetHtmlByWget(string url)
{
//获取待下载网页文件名
string fileName = url.substr((int)url.find_last_of("/") + 1);
if(fileName != "")
{
string strCom = "wget -q "; //wget命令,-q表示不显示下载信息
strCom.append(url);
system(strCom.c_str()); //执行wget
ifstream fin(fileName.c_str());
if(!fin)
{
return "";
}
string strHtml = "";
char chTemp[1024] = "";
//读取网页文件到内存中
while(fin.getline(chTemp , 1024))
{
strHtml.append(string(chTemp));
strcpy(chTemp , "");
}
fin.close();
strCom = "del -f "; //删除文件命令,-f表示直接删除不做任何提示
strCom.append(fileName);
system(strCom.c_str()); //删除刚才下载下来的文件
return strHtml; //返回网页源码
}
else
{
return "";
}
}
class htmlAccessManager : public QObject
{
Q_OBJECT
public:
explicit htmlAccessManager(QObject *parent = 0);
htmlAccessManager(htmlAccessManager const&) = delete;
htmlAccessManager& operator=(htmlAccessManager const&) = delete;
public:
void request_html(QUrl const &url);
public slots:
void reply_finished(QNetworkReply *reply);
private:
QNetworkAccessManager *manager_;
};
htmlAccessManager::htmlAccessManager(QObject *parent) : QObject(0),
manager_(new QNetworkAccessManager(this))
{
connect(manager_, SIGNAL(finished(QNetworkReply*)),
this, SLOT(reply_finished(QNetworkReply*)));
}
void htmlAccessManager::request_html(QUrl const &url)
{
manager_->get(url);
}
void htmlAccessManager::reply_finished(QNetworkReply *reply)
{
#1
QString const result = reply->readAll();
qDebug() << result; //全网页资料
//我不晓得直接在#1宣告这行是否安全,这里最好是用RAII管理资源,比较保险
//免得什么意外发生导致还没呼叫deleteLater就退出reply_finished
reply->deleteLater();
}
class deleteLaterDeletor
{
public:
template<typename T>
void operator()(T *data) const
{
if(data) data->deleteLater();
data = nullptr;
}
};
typdef std::unique_ptr<QNetworkReply, deleteLaterDeletor> ReplyGuard;
void htmlAccessManager::reply_finished(QNetworkReply *reply)
{
ReplyGuard guard(reply);
QString const result = reply->readAll();
qDebug() << result; //全网页资料
}