抓取网页,过滤得到全部UR地址,怎么写?
抓取网页,过滤得到全部UR地址,怎么写?
[解决办法]
Request.Url
[解决办法]
使用javascript的XMLHttpRequest抓取目标网页.
字符串分析 <a> 标签就可以得到大多数url
[解决办法]
正则表达式
[解决办法]
//采集测试 protected void btnCrawltest_Click(object sender, EventArgs e) { btnCrawltest.Enabled = false; HttpWebRequest request = (HttpWebRequest)WebRequest.Create(txtTargeturl.Text); HttpWebResponse response = (HttpWebResponse)request.GetResponse(); StreamReader sr = new StreamReader(response.GetResponseStream(), Encoding.GetEncoding(myHelper.getEncode(txtTargeturl.Text))); //绑定链接列表51aspx.com repeaterLinks.DataSource = getLinks(txtKeyurl.Text, sr.ReadToEnd(), txtTargeturl.Text); repeaterLinks.DataBind(); sr.Close(); btnCrawltest.Enabled = true; Utility.Msg.Show(this,"采集测试完成!"); } /// <summary> /// 51aspx.com获取页面中的链接列表 /// </summary> /// <param name="keyurl">链接标记</param> /// <param name="inStr">代码内容</param> /// <param name="targeturl">目标网址</param> /// <returns>链接列表51aspx.com</returns> private DataSet getLinks(string keyurl, string inStr, string targeturl) { string strLink = @"(?:<a[\s\S]*?href=['""]?(?<url>[^'""> ]+)['""]?[^>]+>(?<title>[\s\S]*?)</a>)"; return myHelper.getResult(inStr, strLink, keyurl, targeturl, "", ""); } protected void Button1_Click(object sender, EventArgs e) { lbHtml.Text = "1223345455"; }