模拟浏览器浏览网页的问题
在网上看到了这个方法,
public String GetPageCode(String PageURL, String Charset)
{
try
{
String strHtml = "";
HttpWebRequest wreq = (HttpWebRequest)WebRequest.Create(PageURL);
CookieContainer cookieCon = new CookieContainer();
wreq.CookieContainer = cookieCon;
HttpWebResponse wresp = (HttpWebResponse)wreq.GetResponse();
Stream s = wresp.GetResponseStream();
StreamReader objReader = new StreamReader(s, System.Text.Encoding.GetEncoding(Charset));
string strLine = "";
while (strLine != null)
{
strLine = objReader.ReadLine();
if (strLine != null)
{
strHtml += strLine.Trim();
}
}
strHtml = strHtml.Replace("<br />", "\r\n");
return strHtml;
}
catch (Exception n) //遇到错误,打印错误
{
return n.Message;
}
}
然后就执行了下,但是都是得到乱码,能帮忙看下吗?(换了其他的编码也是一样)
protected void Button2_Click(object sender, EventArgs e)
{
string sourcecode = GetPageCode("http://www.google.com.hk/search?q=NBA&hl=zh-CN&newwindow=1&safe=strict&prmd=ivns&ei=3cBGUJW-O--fiAeUqoHYAg&start=20&sa=N", "utf-8");
TextBox2.Text = sourcecode;
}
[解决办法]
本帖最后由 net_lover 于 2012-09-07 10:36:17 编辑 既然是模拟,你 的浏览器信息头都没有发送啊
解决 HttpWebRequest 时“重定向次数过多”的问题
当采用下面的方法抓取 google 网站内容时,会报告“重定向次数过多”或者“Too many automatic redirections were attempted”的错误:
C# 代码
String url="http://www.google.com.hk/search?hl=zh-CN&q=孟宪会";
System.Net.HttpWebRequest r = (System.Net.HttpWebRequest)System.Net.HttpWebRequest.Create(url);
r.AllowAutoRedirect = true;
//System.Net.CookieContainer c = new System.Net.CookieContainer();
//r.CookieContainer = c;
System.Net.HttpWebResponse res = r.GetResponse() as System.Net.HttpWebResponse;
System.IO.StreamReader s = new System.IO.StreamReader(res.GetResponseStream(),System.Text.Encoding.GetEncoding("GB2312"));
Response.Write(s.ReadToEnd());
res.Close();
String url="http://www.google.com.hk/search?hl=zh-CN&q=孟宪会";
System.Net.HttpWebRequest r = (System.Net.HttpWebRequest)System.Net.HttpWebRequest.Create(url);
r.AllowAutoRedirect = true;
System.Net.CookieContainer c = new System.Net.CookieContainer();
r.CookieContainer = c;
System.Net.HttpWebResponse res = r.GetResponse() as System.Net.HttpWebResponse;
System.IO.StreamReader s = new System.IO.StreamReader(res.GetResponseStream(),System.Text.Encoding.GetEncoding("GB2312"));
Response.Write(s.ReadToEnd());
res.Close();