求助高手,不会用正则表达式-
在网页源代码里提取下面的内容
1.根据title提取“法兰克福”和 “奥格斯堡”
源代码是:
<title>法兰克福 VS 奥格斯堡 (德甲2013/2014) - 百家欧赔 - 500彩票网</title>
2.根据game_time和odds_hd_bf,提取“2013-12-21 03:30”和“1:1”
源代码是:
<p class="game_time">比赛时间2013-12-21 03:30</p>
<p class="odds_hd_bf"><strong>1:1</strong></p>
3.提取“40.13”和“28.14”和“31.72”这三个数值,这个需要先从众多的代码中截取“平均值”和“下载”之间的代码吧,然后逐步缩小范围对么
源代码是:
...
<td row="1">平均值</td>
<td>
<ul class="tb_tdul td_show_cp" style="display:none;">
<li row="1">2.31</li><li row="1">3.29</li><li row="1">2.92</li>
</ul>
<ul class="tb_tdul">
<li row="1">2.28</li><li row="1">3.32</li><li row="1">3.03</li>
</ul>
</td>
<td><ul class="tb_tdul td_show_cp" style="display:none;">
<li row="1">40.13%</li><li row="1">28.14%</li><li row="1">31.72%</li>
</ul>
<ul class="tb_tdul">
<li row="1">40.95%</li><li row="1">28.16%</li><li row="1">30.89%</li>
</ul>
</td>
<td><p class="td_show_cp" row="1" style="display:none;">92.48%</p><p
class="tb_tdul"><span row="1">93.35%</span></p></td>
<td>
<ul class="tb_tdul td_show_cp" style="display:none;">
<li row="1">0.95</li><li row="1">0.93</li><li row="1">0.90</li>
</ul>
<ul class="tb_tdul" first="0.95|0.93|0.90">
<li row="1">0.93</li><li row="1">0.94</li><li row="1">0.93</li>
</ul>
</td>
<td rowspan="4"><a class="btn_blue_l btn_blue_l_h24 btn_width_auto tb_xiazai_btn"
id="downpl" href="javascript:void(0)">下载</a></td>
...
新年了,祝大家新快乐!
[解决办法]
\<title[^\>]*\>\s*(?<Title>.*?)\s*\</title\>
[解决办法]
http://my.oschina.net/okimbin/blog/109711
C#正则表达式获取html标签之间的内容
[解决办法]
string s = "<title>法兰克福 VS 奥格斯堡 (德甲2013/2014) - 百家欧赔 - 500彩票网</title>";
Regex reg = new Regex(@"(?m)<title[^>]*>(?<title>(?:\w
[解决办法]
\W)*?)</title[^>]*>", RegexOptions.Multiline
[解决办法]
RegexOptions.IgnoreCase);
Match mc = reg.Match(s);
if (mc.Success)
m_title = mc.Groups["title"].Value.Trim();
string[] title = m_title.Split(' ');
string t1 = title[0];
string t2 = title[2];
public static string NoHTML(string fHtmlString)
{
//删除脚本
fHtmlString = Regex.Replace(fHtmlString, @"<script[^>]*?>.*?</script>", "", RegexOptions.IgnoreCase);
//删除HTML
fHtmlString = Regex.Replace(fHtmlString, @"<(.[^>]*)>", "", RegexOptions.IgnoreCase);
fHtmlString = Regex.Replace(fHtmlString, @"([\r\n])[\s]+", "", RegexOptions.IgnoreCase);
fHtmlString = Regex.Replace(fHtmlString, @"-->", "", RegexOptions.IgnoreCase);
fHtmlString = Regex.Replace(fHtmlString, @"<!--.*", "", RegexOptions.IgnoreCase);
fHtmlString = Regex.Replace(fHtmlString, @"&(quot
[解决办法]
#34);", """, RegexOptions.IgnoreCase);
fHtmlString = Regex.Replace(fHtmlString, @"&(amp
[解决办法]
#38);", "&", RegexOptions.IgnoreCase);
fHtmlString = Regex.Replace(fHtmlString, @"&(lt
[解决办法]
#60);", "<", RegexOptions.IgnoreCase);
fHtmlString = Regex.Replace(fHtmlString, @"&(gt
[解决办法]
#62);", ">", RegexOptions.IgnoreCase);
fHtmlString = Regex.Replace(fHtmlString, @"&(nbsp
[解决办法]
#160);", " ", RegexOptions.IgnoreCase);
fHtmlString = Regex.Replace(fHtmlString, @"&(iexcl
[解决办法]
#161);", "\xa1", RegexOptions.IgnoreCase);
fHtmlString = Regex.Replace(fHtmlString, @"&(cent
[解决办法]
#162);", "\xa2", RegexOptions.IgnoreCase);
fHtmlString = Regex.Replace(fHtmlString, @"&(pound
[解决办法]
#163);", "\xa3", RegexOptions.IgnoreCase);
fHtmlString = Regex.Replace(fHtmlString, @"&(copy
[解决办法]
#169);", "\xa9", RegexOptions.IgnoreCase);
fHtmlString = Regex.Replace(fHtmlString, @"&#(\d+);", "", RegexOptions.IgnoreCase);
fHtmlString.Replace("<", "");
fHtmlString.Replace(">", "");
fHtmlString.Replace("\r\n", "");
return fHtmlString;
}
string s="<p class="game_time">比赛时间2013-12-21 03:30</p><p class="odds_hd_bf"><strong>1:1</strong></p>";
string ss = NoHTML(s);