请教一个嵌套正则表达式
本帖最后由 Kristd 于 2013-12-18 17:44:23 编辑
小弟想抓取以下html文件的两个table
table里面有可能嵌套了子table
我想用Regex对象匹配完后存放到MatchCollection对象里面
然后再通过index从MatchCollection里面把每个table单独读出来
所以匹配的结果应该是这样的:
<table cellpadding=0 cellspacing=0>
<tr>
<td width=68 height=52 bgcolor=white >
<table cellpadding=0 cellspacing=0 width="100%">
<tr>
<td>
<p>...</p>
</td>
</tr>
</table>
</td>
</tr>
</table>
<p >手持量筒量取一定体积的液体</p>
<p >用托盘天平称量5.65g食盐</p>
<table border=1 cellspacing=0 cellpadding=0
>
<tr >
<td width=197 valign=top >
<p>A</p>
</td>
<td width=262 valign=top >
<p>B</p>
</td>
</tr>
</table>
<table cellpadding=0 cellspacing=0>
<tr>
<td width=68 height=52 bgcolor=white >
<table cellpadding=0 cellspacing=0 width="100%">
<tr>
<td> ...
</td>
</tr>
</table>
</td>
</tr>
</table>
<table border=1 cellspacing=0 cellpadding=0
>
<tr >
<td width=197 valign=top >
<p>...</p>
</td>
<td width=262 valign=top >
<p>...</p>
</td>
</tr>
</table>
Regex ex = new Regex(strRx, RegexOptions.Singleline);
MatchCollection matchCollection = ex.Matches(strInput);
//我的写法:
//结果是匹配了所有内容..
//<table[^>]*>[\s\S]*(((?'Open'<table[^>]*>)[\s\S]*)+((?'-Open'</table>)[\s\S]*)+)*(?(Open)(?!))</table>
MatchCollection mc1 = Regex.Matches(html, @"(?is)<table
[解决办法]
</table");
List<string> tb = new List<string>();
int open = 0,index=0;
foreach (Match m in mc1)
{
if (m.Value.ToLower() == "<table")
{
open++; index = m.Index;
}
if (m.Value.ToLower() == "</table")
open--;
if (open == 0)
{
Console.WriteLine(html.Substring(index, m.Index - index + 8));
index = 0;
}
}