正则表达式匹配
<div class="sclist">
<dl>
<dt><a href="/tang/0608/27/ad3772c9d45a89d21230d6675a7141dc.html" title="送朱大入秦" target="_blank">送朱大入秦</a> <span>五绝 </span></dt>
<dd class="preview">游人五陵去,宝剑值千金。分手脱相赠,平生一片心。</dd>
<dd class="info">唐·<span>孟浩然</span> <a href="/tang/0608/27/ad3772c9d45a89d21230d6675a7141dc.html" class="info2">查阅全部...</a></dd>
</dl>
<dl>
<dt><a href="/tang/0805/26/41ce0cc6bdf523fdf3b129e17293cf33.html" title="送元二使安西" target="_blank">送元二使安西</a> <span>七绝 送别</span></dt>
<dd class="preview">渭城朝雨浥轻尘,客舍青青柳色新。劝君更尽一杯酒,西出阳关无故人。</dd>
<dd class="info">唐·<span>王维</span> <a href="/tang/0805/26/41ce0cc6bdf523fdf3b129e17293cf33.html" class="info2">查阅全部...</a></dd>
</dl>
</div>
Regex reg = new Regex(@"(?is)<dt><a\s+href=""(?<url>[^'""\s>]*)""[^>]*>(?<text>.*?)</a>\s*<span>(?<type>.*?)</span></dt>");
MatchCollection mc = reg.Matches(yourStr);
foreach (Match m in mc)
{
richTextBox2.Text += m.Groups["url"].Value + "\n";
richTextBox2.Text += m.Groups["text"].Value + "\n";
richTextBox2.Text += m.Groups["type"].Value + "\n-----------------------\n";
}
string str = File.ReadAllText(@"C:\1.txt", Encoding.GetEncoding("GB2312"));
MatchCollection mc = Regex.Matches(str, @"(?is)<dt><a\b[^>]*?href=""(?<url>[^'""\s>]*)""[^>]*>(?<text>.*?)</a>\s*<span>(?<type>.*?)</span></dt>");
foreach (Match m in mc)
{
Console.WriteLine(string.Format("url:{0},text: {1},type: {2}", m.Groups["url"].Value, m.Groups["text"].Value, m.Groups["type"].Value));
}