首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > .NET > .NET Framework >

Alexa自动采集器出现有关问题

2012-01-31 
Alexa自动采集器出现问题?前段时间做了一个Alexa流量数据自动采集工具,是用C# Winform做的,窗体上有一个We

Alexa自动采集器出现问题?
前段时间做了一个Alexa流量数据自动采集工具,是用C# Winform做的,窗体上有一个WebBrowser控件,窗体加载后,添加需要采集的网站列表,然后访问Http://cn.alexa.com采集,在这里采集到的数据是没问题的,接下来去http://alexa.chinaz.com采集IP和PV值的时候,经常会出现访问后是空白页,然后程序就会停在那里了,很奇怪是什么问题?

代码如下:

C# code
bool flagExit = true;//标记是否退出,true为不退出,false为退出        List<Alexa> list = new List<Alexa>();        int index = 0;//官网下标        int indexChinaz = 0;//Chinaz下标        bool startRecord = false;//标记是否从官网开始采集        bool startChinaz = false;//标记是否从Chinaz开始采集        bool IsRecording = false;//标记是否开始采集,仅当前时间为13点时,为True        StringBuilder sb = new StringBuilder();        HtmlDocument document = null;        private void Form1_Load(object sender, EventArgs e)        {            //窗体加载后,暂停2秒            System.Threading.Thread.Sleep(2000);            list.Add(new Alexa { DomainName = "她时代", DomainUrl = "smartshe.com" });            list.Add(new Alexa { DomainName = "elle", DomainUrl = "ellechina.com" });            list.Add(new Alexa { DomainName = "onlylady", DomainUrl = "onlylady.com" });            list.Add(new Alexa { DomainName = "pclady", DomainUrl = "pclady.com.cn" });            list.Add(new Alexa { DomainName = "yoka", DomainUrl = "yoka.com" });            list.Add(new Alexa { DomainName = "贝太厨房", DomainUrl = "bettyskitchen.com.cn" });            list.Add(new Alexa { DomainName = "都市主妇", DomainUrl = "herschina.com" });            list.Add(new Alexa { DomainName = "嘉人", DomainUrl = "marieclairechina.com" });            list.Add(new Alexa { DomainName = "女友", DomainUrl = "ny1988.com" });            list.Add(new Alexa { DomainName = "瑞丽", DomainUrl = "rayli.com.cn" });            list.Add(new Alexa { DomainName = "时尚", DomainUrl = "trends.com.cn" });            list.Add(new Alexa { DomainName = "悦己", DomainUrl = "self.com.cn" });            timer1.Enabled = true;            webBrowser1.Navigate(new Uri(@"about:blank"));            tboCurUrl.Text = "about:blank";        }        private void webBrowser1_DocumentCompleted(object sender, WebBrowserDocumentCompletedEventArgs e)//WebBrowser加载完成事件        {            if ((e.Url.ToString() == webBrowser1.Url.ToString() || webBrowser1.ReadyState == WebBrowserReadyState.Complete) && startChinaz)            {                SetIPPv();                GetAlexaFromChinaz(indexChinaz);            }            if ((e.Url.ToString() == webBrowser1.Url.ToString() || webBrowser1.ReadyState == WebBrowserReadyState.Complete) && startRecord)            {                //设置Alexa信息                SetAlexaInfo();                start(index);            }        }        /// <summary>        /// 从官网读取Alexa流量信息        /// </summary>        /// <param name="num"></param>        private void start(int num)//根据网站地址访问Alexa官网        {            if (num < list.Count)            {                timer1.Enabled = false;                IsRecording = true;                //暂停三秒                System.Threading.Thread.Sleep(1000);                webBrowser1.Navigate(new Uri(@"http://cn.alexa.com/siteinfo/" + list[num].DomainUrl + "#trafficstats"));                tboCurUrl.Text = "http://cn.alexa.com/siteinfo/" + list[num].DomainUrl + "#trafficstats";            }            else            {                startRecord = false;                startChinaz = true;//标记开始读取IP、PV值                GetAlexaFromChinaz(0);            }        }        /// <summary>        /// 访问Chinz,以获取IP和PV值        /// </summary>        /// <param name="num"></param>        private void GetAlexaFromChinaz(int num)//根据网站地址访问站长之家        {            if (num < list.Count)            {                //暂停五秒                System.Threading.Thread.Sleep(5000);                webBrowser1.Navigate(new Uri(@"http://alexa.chinaz.com/?domain=" + list[num].DomainUrl));                tboCurUrl.Text = "http://alexa.chinaz.com/?domain=" + list[num].DomainUrl;            }            else            {                //自动执行导出方法                //ExportToExcel();                ExportToSql();                timer1.Enabled = true;                startRecord = false;                startChinaz = false;                index = 0;                indexChinaz = 0;                webBrowser1.Navigate(new Uri(@"http://www.smartshe.com"));                tboCurUrl.Text = "http://www.smartshe.com";            }        }        /// <summary>        /// 设置集合中各对象的IP和PV值        /// </summary>        private void SetIPPv()//设置集合中各对象的IP和PV值        {            document = webBrowser1.Document;            list[indexChinaz].IpNum = document.GetElementById("IpNum").InnerText == "相关数据不充分,无法统计。" ? "-" : document.GetElementById("IpNum").InnerText.Substring(2).Replace(",", "");            list[indexChinaz].PvNum = document.GetElementById("PvNum").InnerText == "相关数据不充分,无法统计。" ? "-" : document.GetElementById("PvNum").InnerText.Substring(2).Replace(",", "");            indexChinaz += 1;        }        /// <summary>        /// 设置集合中各对象的排名信息        /// </summary>        private void SetAlexaInfo()//设置集合中各对象的排名信息        {            document = webBrowser1.Document;            sb.Remove(0, sb.ToString().Length);            sb.Append(document.GetElementById("rank").InnerText);            //string ranks = document.GetElementById("rank").InnerText;            //使用正则匹配得出(昨日排名、最近七天、最近七天)            Match m = Regex.Match(sb.ToString(), @"昨日([\d,]+)", RegexOptions.IgnoreCase);            if (m.Success)            {                list[index].TodayRank = m.Groups[1].Value.Replace(",", "");            }            else            {                list[index].TodayRank = "-";            }            m = Regex.Match(sb.ToString(), @"最近七天平均([\d,]+)", RegexOptions.IgnoreCase);            if (m.Success)            {                list[index].WeekRank = m.Groups[1].Value.Replace(",", "");            }            else            {                list[index].WeekRank = "-";            }            m = Regex.Match(sb.ToString(), @"最近一月平均([\d,]+)", RegexOptions.IgnoreCase);            if (m.Success)            {                list[index].MonthRank = m.Groups[1].Value.Replace(",", "");            }            else            {                list[index].MonthRank = "-";            }            //综合排名            sb.Remove(0, sb.ToString().Length);            sb.Append(document.GetElementById("siteStats").InnerText);            //string rank = document.GetElementById("siteStats").InnerText;            m = Regex.Match(sb.ToString().Replace("\r\n", ""), @"([\d,]+)网站流量排名", RegexOptions.IgnoreCase);            if (m.Success)            {                list[index].Rank = m.Groups[1].Value.Replace(",", "");            }            else            {                list[index].Rank = "-";            }            sb.Remove(0, sb.ToString().Length);            sb.Append(document.GetElementById("trafficstats").InnerText);            //string china = document.GetElementById("trafficstats").InnerText;            m = Regex.Match(sb.ToString(), @"([\d,]+)   China", RegexOptions.IgnoreCase);            if (m.Success)            {                list[index].ChinaRank = m.Groups[1].Value.Replace(",", "");            }            else            {                list[index].ChinaRank = "-";            }            index += 1;        }        /// <summary>        /// 将结果插入至数据库        /// </summary>        private void ExportToSql()//导入数据库        {            foreach (Alexa alexa in list)            {                DBHelp.ExecuteNonQuery(alexa);            }        }        private void GoUrl()//根据URL访问网站        {            if (tboCurUrl.Text.Trim().StartsWith("http://www."))            {                webBrowser1.Navigate(new Uri(@"" + tboCurUrl.Text.Trim()));            }            else if (tboCurUrl.Text.Trim().StartsWith("www."))            {                webBrowser1.Navigate(new Uri(@"http://" + tboCurUrl.Text.Trim()));            }            else if (tboCurUrl.Text.Trim().StartsWith("http://"))            {                webBrowser1.Navigate(new Uri(@"" + tboCurUrl.Text.Trim()));            }            else            {                webBrowser1.Navigate(new Uri(@"http://" + tboCurUrl.Text.Trim()));            }        }        /// <summary>        /// 最小化        /// </summary>        private void NormalToMinimized()//最小化        {            this.Visible = false;            this.WindowState = FormWindowState.Minimized;            notifyIcon1.Visible = true;        }        /// <summary>        /// 恢复正常窗体大小        /// </summary>        private void MinimizedToNormal()//恢复正常        {            this.Visible = true;            this.WindowState = FormWindowState.Normal;            notifyIcon1.Visible = false;        } 


请知道的一定留下答案,相当感谢了!

[解决办法]
/// <summary>
/// 设置集合中各对象的IP和PV值
/// </summary>
private void SetIPPv()//设置集合中各对象的IP和PV值
{
document = webBrowser1.Document;
list[indexChinaz].IpNum = document.GetElementById("IpNum").InnerText == "相关数据不充分,无法统计。" ? "-" : document.GetElementById("IpNum").InnerText.Substring(2).Replace(",", "");
list[indexChinaz].PvNum = document.GetElementById("PvNum").InnerText == "相关数据不充分,无法统计。" ? "-" : document.GetElementById("PvNum").InnerText.Substring(2).Replace(",", "");
indexChinaz += 1;
}
空白页无法获取IPNum和PvNum,异常未处理,所以卡死,加上try catch就可以了!

热点排行