关于html提取的问题
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.0//EN" "http://www.w3.org/TR/REC-html40/strict.dtd">
<html><head><meta name="qrichtext" content="1" /><style type="text/css">
p, li { white-space: pre-wrap; }
</style></head><body style=" font-family:'SimSun'; font-size:9pt; font-weight:400; font-style:normal;">
<p style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#0000ff;">11 11:43:08</span></p>
<p style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#0000ff;"> </span><img src="cf0752613fd889b4751675aa1e57896f.gif" /></p>
<p style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#0000ff;">11 11:43:15</span></p>
<p style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#0000ff;"> </span><img src="8e4138a533820d7f20613c80e6246da5.png" /></p>
<p style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#0000ff;">11 11:43:40</span></p>
<p style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#0000ff;"> </span>是一个号人哦<span style=" font-family:'华文琥珀'; font-size:15pt; font-weight:600;">阿斯蒂芬</span><img src="eca44343203d16f0cf735c8f61dbf720.gif" /><img src="a73bc009bfc6931f45708be384b1ced3.png" /></p>
<p style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#008000;">11 11:43:51</span></p>
<p style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#008000;"> </span><img src="031c26b11a497939e939561320eb991d.gif" /></p>
<p style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#008000;">11 11:44:19</span></p>
<p style=" margin-top:12px; margin-bottom:12px; margin-left:0px; margin-right:0px; -qt-block-indent:0; text-indent:0px;"><span style=" color:#008000;"> </span>111111111111111111111111111asdfasdf<img src="d21ca545587b5aa5359edd209a903aaa.gif" /><img src="680d34c92e4c2e686555af3a7aa249b4.png" /><img src="a1e03d82b24559aa2003937e2c1e10ae.png" /></p></body></html>
QString pattern("<img src="(.*)" />");
QRegExp rx(pattern);
rx.setMinimal(true);
int pos = string.indexOf(rx);
QStringList list = rx.capturedTexts();
[解决办法]
参考下这例子试试
QString str = "offsets: 1.23 .50 71.00 6.00";
QRegExp rx("\\d*\\.\\d+"); // primitive floating point matching
int count = 0;
int pos = 0;
while ((pos = rx.indexIn(str, pos)) != -1) {
++count;
pos += rx.matchedLength();
}
// pos will be 9, 14, 18 and finally 24; count will end up as 4