请教:如何用C#获取网页显示的内容? (不是获取html代码!)
我想用C#获取某个网页上显示的内容(注:不是获取html代码)
我已经用vbs实现了这样的功能,源码如下:
Const ForWriting = 2
Set objFSO = CreateObject("Scripting.FileSystemObject")
Set objTextFile = objFSO.OpenTextFile("Kaijiang_xj.txt", ForWriting, True)
Set objIE = CreateObject("InternetExplorer.Application")
'objIE.visible = True
objIE.Navigate("http://www.xjflcp.com/ssc/")
Do While objIE.ReadyState<>4
Loop
Set objDoc = objIE.Document
Set aTables = objDoc.getElementsByTagName("Table")
For Each objTable In aTables
objTextFile.WriteLine objTable.Innertext
Next
objTextFile.Close
objIE.Quit
另存位*.vbs,执行即可。
请问,怎么用C#实现这样的功能?
[解决办法]
WebBrowser web = new WebBrowser(); web.Navigate("http://www.xjflcp.com/ssc/"); web.DocumentCompleted += new WebBrowserDocumentCompletedEventHandler(web_DocumentCompleted);void web_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e) { WebBrowser web = (WebBrowser)sender; HtmlElementCollection ElementCollection = web.Document.GetElementsByTagName("Table"); foreach (HtmlElement item in ElementCollection) { File.AppendAllText("Kaijiang_xj.txt", item.InnerText); } }
[解决办法]
//先取网页HTMLstring r; WebRequest myReq = WebRequest.Create("网页URL"); WebResponse myRes = myReq.GetResponse(); Stream resStream = myRes.GetResponseStream(); StreamReader sr = new StreamReader(resStream, Encoding.Default); StringBuilder sb = new StringBuilder(); while ((r = sr.ReadLine()) != null) { sb.Append(r); } //再利用正则表达string regexStr = @"正则表达式"; Match mc = Regex.Match(sb.ToString(), regexStr, RegexOptions.IgnoreCase); this.Label1.Text = mc.Groups[1].Value; myRes.Close();
[解决办法]
WebClient web = new WebClient();byte [] buffer = web.DownloadData("http://www.xjflcp.com/ssc/");string html = System.Text.UTF8Encoding.UTF8.GetString(buffer);
[解决办法]
/// <summary> /// 抓取网页前台源码 /// </summary> /// <param name="url"></param> /// <returns></returns> public string GetPageContent(string url) { string ContentHtml = String.Empty; HttpWebRequest rt = null; HttpWebResponse rs = null; Stream stream = null; StreamReader sr = null; try { rt = (HttpWebRequest)WebRequest.Create(url); rs = (HttpWebResponse)rt.GetResponse(); stream = rs.GetResponseStream(); sr = new StreamReader(stream, System.Text.Encoding.Default); ContentHtml = sr.ReadToEnd(); sr.Close(); stream.Close(); rs.Close(); } catch (Exception ex) { ContentHtml = "no values"; } finally { if (rs != null) rs.Close(); if (stream != null) stream.Close(); if (sr != null) sr.Close(); } return ContentHtml; }