求正则表达式循环提取链接,标题,时间!
<li><a href="链接"alt="标题"><img alt="标题" src="c.gif" width="6" height="7"/>标题</a><span class="list_content_time">时间</span><span class="list_content_read">点击</span></li><li><a href="链接"alt="标题"><img alt="标题" src="c.gif" width="6" height="7"/>标题</a><span class="list_content_time">时间</span><span class="list_content_read">点击</span></li><li><a href="链接"alt="标题"><img alt="标题" src="c.gif" width="6" height="7"/>标题</a><span class="list_content_time">时间</span><span class="list_content_read">点击</span></li><li><a href="链接"alt="标题"><img alt="标题" src="c.gif" width="6" height="7"/>标题</a><span class="list_content_time">时间</span><span class="list_content_read">点击</span></li>
Regex reg = new Regex(@"(?is)<a(?:(?!href=).)*href=(['""]?)(?<url>[^""\s>]*)\1[^>]*>(?<text>(?:(?!</?a\b).)*)</a>"); MatchCollection mc = reg.Matches(li.OuterHtml); foreach (Match m in mc) { string url = m.Groups["url"].Value; string c = Type(m.Groups["text"].Value).ToString(); } }
string s = @"<li><a href=""链接""alt=""标题""><img alt=""标题"" src=""c.gif"" width=""6"" height=""7""/>标题</a><span class=""list_content_time"">时间</span><span class=""list_content_read"">点击</span></li><li><a href=""链接""alt=""标题""><img alt=""标题"" src=""c.gif"" width=""6"" height=""7""/>标题</a><span class=""list_content_time"">时间</span><span class=""list_content_read"">点击</span></li><li><a href=""链接""alt=""标题""><img alt=""标题"" src=""c.gif"" width=""6"" height=""7""/>标题</a><span class=""list_content_time"">时间</span><span class=""list_content_read"">点击</span></li><li><a href=""链接""alt=""标题""><img alt=""标题"" src=""c.gif"" width=""6"" height=""7""/>标题</a><span class=""list_content_time"">时间</span><span class=""list_content_read"">点击</span></li>"; MatchCollection matches = Regex.Matches(s, @"(?is)<li><a href=""(?<href>.*?)""[^>]*><img[^>]*/>(?<title>.*?)</a><span[^>]*>(?<time>.*?)</span>.*?</li>"); foreach (Match match in matches) { Response.Write(match.Groups["href"].Value + "<br/>"); Response.Write(match.Groups["title"].Value + "<br/>"); Response.Write(match.Groups["time"].Value + "<hr/>"); }