鄙人常用工具类using System;using System.Collections.Generic;using System.Text;using System.Net;using System.Collections;using System.Text.RegularExpressions; namespace zjf.Web.UI{ public class CollectionHelper { /// <summary> /// 清理换行 /// </summary> /// <param name="html"></param> /// <returns></returns> public static string ClearWrap(string html) { return html.Replace("\t", "").Replace("\n", "").Replace("\r", ""); } public static string GetWebPage(string url, int encode) { try { WebClient myclient = new WebClient(); myclient.Credentials = CredentialCache.DefaultCredentials; switch (encode) { case 0: myclient.Encoding = Encoding.Default; break; case 1: myclient.Encoding = Encoding.GetEncoding("GB2312"); break; case 2: myclient.Encoding = Encoding.UTF8; break; case 3: myclient.Encoding = Encoding.Unicode; break; } return ClearWrap(myclient.DownloadString(url)); } catch { return ""; } } public static string GetTagBody(string html, string start, string end) { int startPos = html.IndexOf(start); if (startPos < 0) return "$StartError$"; int endPos = html.IndexOf(end); if (endPos < 0) return "$EndError$"; return html.Substring(startPos + start.Length, endPos - startPos - start.Length); } public static string[] GetLinkArray(string html, string start, string end, string listUrl) { Regex reg = new Regex(start + "(.+?)" + end, RegexOptions.IgnoreCase); MatchCollection mc = reg.Matches(html); if (mc.Count == 0) return null; string[] links = new string[mc.Count]; for (int i = 0; i < mc.Count; i++) { links[i] = GetHttpUrl(listUrl, mc[i].Groups[1].Value); } return links; } public static string GetHttpUrl(string listUrl, string link) { if (link.StartsWith("http://", StringComparison.OrdinalIgnoreCase)) return link; listUrl = listUrl.Replace("://", @":\\"); string[] listUrlArray = listUrl.Split('/'); if (link.StartsWith("/")) return listUrlArray[0] + link; if (link.StartsWith("../")) { int j = 0; while (link.StartsWith("../")) { j++; link = link.Substring(3); } string newLink = ""; for (int i = 0; i < listUrlArray.Length - 1 - j; i++) { newLink += listUrlArray[i] + "/"; } return newLink + link; } return link; } /// <summary> /// 获取文章 /// </summary> /// <param name="url">url地址</param> /// <param name="col">采集项目</param> /// <returns></returns> public static CollectionHistory GetArticle(string url, Collection col, bool test) { string html = GetWebPage(url, col.SiteEncode); html = GetTagBody(html, col.ArticleStart, col.ArticleEnd); CollectionHistory art = new CollectionHistory(); art.Title = GetTagBody(html, col.TitleStart, col.TitleEnd); art.Body = GetTagBody(html, col.BodyStart, col.BodyEnd); art.PostDate = DateTime.Now; art.Author = "测试"; art.CollectionDate = DateTime.Now; if (!test) art.Add(); return art; } }}