网页表格信息抓取
页面源代码如下:
<!DOCTYPE HTML PUBLIC "-//W3C//DTD HTML 4.01 Transitional//EN" "http://www.w3c.org/TR/1999/REC-html401-19991224/loose.dtd">
<HTML><HEAD><TITLE></TITLE>
<META content="text/html; charset=GBK" http-equiv=Content-Type>
<META name=GENERATOR content="MSHTML 8.00.7601.18106"></HEAD>
<BODY>
<FORM method=post name=pusManageForm action=pus.do><INPUT type=hidden
name=method> <INPUT value=15647695 type=hidden name=sid> <INPUT value=2
type=hidden name=partCount>
<TABLE width="100%" align=center>
<TBODY>
<TR>
<TD>
<TABLE border=0 width="100%">
<TBODY>
<TR>
<TD width=10> </TD>
<TD>
<TABLE border=0 cellSpacing=1 cellPadding=0 width="95%"
align=center>
<TBODY>
<TR>
<TD height=40 align=left><B><FONT color=rgb(0,0,20)
size=2>aaaaaa</FONT></B> <BR><B><FONT
color=rgb(0,0,20) size=2>aaaaaa</FONT></B> </TD></TR>
<TR>
<TD height=40 align=left><FONT color=rgb(0,0,20)
size=1>aaaaaa</FONT> <FONT color=rgb(0,0,20)
size=1>xxxx(aaaaaa)</FONT> </TD></TR>
<TR>
<TD align=left><FONT color=rgb(0,0,20) size=1>aaaaaa</FONT> <FONT color=rgb(0,0,20) size=1>xxxx</FONT> </TD></TR>
<TR>
<TD align=left><FONT color=rgb(0,0,20) size=1>adress
aaaaaa</FONT> <FONT color=rgb(0,0,20) size=1>adress</FONT>
</TD></TR></TBODY></TABLE></TD>
<TD align=middle><IMG
src="http://aaa/DY/images/pus/DYNP_new.jpg">
</TD>
<TD align=right>
<TABLE border=0 cellSpacing=1 cellPadding=1 width="95%"
align=center>
<TBODY>
<TR>
<TD align=right><B><FONT color=rgb(0,0,20)
size=4>交货计划单</FONT></B> </TD></TR>
<TR>
<TD align=right><FONT color=rgb(0,0,20)
size=1>计划到达时间 2013-09-16 </FONT></TD></TR>
<TR>
<TD align=right><FONT color=rgb(0,0,20) size=2>PUS编号
770266110 版本00 </FONT></TD></TR>
<TR>
<TD align=right><FONT color=rgb(0,0,20) size=3>Customer 客户
</FONT></TD></TR>
<TR>
<TD noWrap align=right><FONT size=+3
face=C39P36DmTt color="blue">*DYNP-770266110-00*</FONT>
</TD></TR></TBODY></TABLE></TD>
<TD width=10> </TD>
<TR></TR></TBODY></TABLE><BR>
<TABLE border=1 width="95%" align=center>
<TBODY>
<TR class=table_head>
<TD colSpan=4 align=left><B><FONT>Delivery Information
交货信息</FONT></B> </TD></TR>
<TR>
<TD size="20%"><B><FONT color=rgb(0,0,255)>工厂<BR>Plant</FONT></B>
</TD>
<TD colSpan=3>xxxxxx
</TD></TR>
<TR class=table_c2>
<TD size="20%"><B><FONT color=rgb(0,0,255)>取货时间<BR>Pick Up
Time</FONT></B> </TD>
<TD size="30%"><font color = "blue">2013-09-09 16:30 </font></TD>
<TD size="20%"><B><FONT color=rgb(0,0,255)>需要供应商反馈<BR>Need Duns
Response</FONT></B> </TD>
<TD size="30%">N </TD></TR>
<TR>
<TD size="20%"><B><FONT color=rgb(0,0,255)>交货日期<BR>Delivery
Date</FONT></B> </TD>
<TD size="30%">2013-09-16 </TD>
<TD size="20%"><B><FONT color=rgb(0,0,255)>窗口时间<BR>Window
Time</FONT></B> </TD>
<TD size="30%">16:30 </TD></TR>
<TR class=table_c2>
<TD><B><FONT color=rgb(0,0,255)>卸货口<BR>Dock</FONT></B> </TD>
<TD>CC-70D </TD>
<TD><B><FONT color=rgb(0,0,255)>卸货口负责人<BR>Dock Incharger</FONT></B>
</TD>
<TD>kkk</TD></TR>
<TR>
<TD><B><FONT color=rgb(0,0,255)>卸货口电话<BR>Dock Tel</FONT></B> </TD>
<TD>011-1111 </TD>
<TD><B><FONT color=rgb(0,0,255)>卸货口地址<BR>Dock Address</FONT></B> </TD>
<TD>adress </TD></TR>
<TR class=table_c2>
<TD size="20%"><B><FONT color=rgb(0,0,255)>交货地点<BR>Delivery
Place</FONT></B> </TD>
<TD colSpan=3></TD></TR>
<TR>
<TD><B><FONT color=rgb(0,0,255)>计划跟踪员<BR>Follow Up</FONT></B> </TD>
<TD>kkkk </TD>
<TD><B><FONT color=rgb(0,0,255)>计划跟踪员电话/传真<BR>FollowUp
Tel/Fax</FONT></B> </TD>
<TD>011-1111</TD></TR>
<TR class=table_c2>
<TD><B><FONT color=rgb(0,0,255)>交货说明<BR>Delivery Note</FONT></B> </TD>
<TD colSpan=3></TD></TR></TBODY></TABLE><BR>
<TABLE border=1 width="95%" align=center>
<TBODY>
<TR class=table_head>
<TD colSpan=14 align=left><B><FONT>Part Information 零件清单</FONT></B>
</TD></TR>
<TR>
<TD align=middle><FONT color=rgb(0,0,0)>序号</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>零件号</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>零件说明</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>需求数量</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>承诺数量</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>实收数量</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>包装数</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>料箱数</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>料箱号</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>实发料箱号</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>实发料箱数</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>实收料箱号</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>实收料箱数</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>备注</FONT></TD></TR>
<TR>
<TD align=middle>1</TD>
<TD align=middle><font color = "blue">12647212</font></TD>
<TD align=middle></TD>
<TD align=middle><font color = "blue">60</font></TD>
<TD align=middle>60 </TD>
<TD align=middle></TD>
<TD align=middle><font color = "blue">15</font></TD>
<TD align=middle><font color = "blue">4</font></TD>
<TD align=middle><font color = "blue">P000000D</font></TD>
<TD align=middle></TD>
<TD align=middle></TD>
<TD align=middle></TD>
<TD align=middle></TD>
<TD align=middle></TD></TR>
<TR class=table_c2>
<TD align=middle>2</TD>
<TD align=middle><font color = "blue">12654172</font></TD>
<TD align=middle></TD>
<TD align=middle><font color = "blue">615</font></TD>
<TD align=middle>615 </TD>
<TD align=middle></TD>
<TD align=middle><font color = "blue">15</font></TD>
<TD align=middle><font color = "blue">41</font></TD>
<TD align=middle><font color = "blue">P000000D</font></TD>
<TD align=middle></TD>
<TD align=middle></TD>
<TD align=middle></TD>
<TD align=middle></TD>
<TD align=middle></TD></TR></TBODY></TABLE><BR>
</FORM>
</BODY></HTML>
<?php
$string = '<TABLE width="100%" align=center>
<TBODY>
<TR>
<TD>
<TABLE border=0 width="100%">
<TBODY>
<TR>
<TD width=10> </TD>
<TD>
<TABLE border=0 cellSpacing=1 cellPadding=0 width="95%"
align=center>
<TBODY>
<TR>
<TD height=40 align=left><B><FONT color=rgb(0,0,20)
size=2>aaaaaa</FONT></B> <BR><B><FONT
color=rgb(0,0,20) size=2>aaaaaa</FONT></B> </TD></TR>
<TR>
<TD height=40 align=left><FONT color=rgb(0,0,20)
size=1>aaaaaa</FONT> <FONT color=rgb(0,0,20)
size=1>xxxx(aaaaaa)</FONT> </TD></TR>
<TR>
<TD align=left><FONT color=rgb(0,0,20) size=1>aaaaaa</FONT> <FONT color=rgb(0,0,20) size=1>xxxx</FONT> </TD></TR>
<TR>
<TD align=left><FONT color=rgb(0,0,20) size=1>adress
aaaaaa</FONT> <FONT color=rgb(0,0,20) size=1>adress</FONT>
</TD></TR></TBODY></TABLE></TD>
<TD align=middle><IMG
src="http://aaa/DY/images/pus/DYNP_new.jpg">
</TD>
<TD align=right>
<TABLE border=0 cellSpacing=1 cellPadding=1 width="95%"
align=center>
<TBODY>
<TR>
<TD align=right><B><FONT color=rgb(0,0,20)
size=4>交货计划单</FONT></B> </TD></TR>
<TR>
<TD align=right><FONT color=rgb(0,0,20)
size=1>计划到达时间 2013-09-16 </FONT></TD></TR>
<TR>
<TD align=right><FONT color=rgb(0,0,20) size=2>PUS编号
770266110 版本00 </FONT></TD></TR>
<TR>
<TD align=right><FONT color=rgb(0,0,20) size=3>Customer 客户
</FONT></TD></TR>
<TR>
<TD noWrap align=right><FONT size=+3
face=C39P36DmTt color="blue">*DYNP-770266110-00*</FONT>
</TD></TR></TBODY></TABLE></TD>
<TD width=10> </TD>
<TR></TR></TBODY></TABLE><BR>
<TABLE border=1 width="95%" align=center>
<TBODY>
<TR class=table_head>
<TD colSpan=4 align=left><B><FONT>Delivery Information
交货信息</FONT></B> </TD></TR>
<TR>
<TD size="20%"><B><FONT color=rgb(0,0,255)>工厂<BR>Plant</FONT></B>
</TD>
<TD colSpan=3>xxxxxx
</TD></TR>
<TR class=table_c2>
<TD size="20%"><B><FONT color=rgb(0,0,255)>取货时间<BR>Pick Up
Time</FONT></B> </TD>
<TD size="30%"><font color = "blue">2013-09-09 16:30 </font></TD>
<TD size="20%"><B><FONT color=rgb(0,0,255)>需要供应商反馈<BR>Need Duns
Response</FONT></B> </TD>
<TD size="30%">N </TD></TR>
<TR>
<TD size="20%"><B><FONT color=rgb(0,0,255)>交货日期<BR>Delivery
Date</FONT></B> </TD>
<TD size="30%">2013-09-16 </TD>
<TD size="20%"><B><FONT color=rgb(0,0,255)>窗口时间<BR>Window
Time</FONT></B> </TD>
<TD size="30%">16:30 </TD></TR>
<TR class=table_c2>
<TD><B><FONT color=rgb(0,0,255)>卸货口<BR>Dock</FONT></B> </TD>
<TD>CC-70D </TD>
<TD><B><FONT color=rgb(0,0,255)>卸货口负责人<BR>Dock Incharger</FONT></B>
</TD>
<TD>kkk</TD></TR>
<TR>
<TD><B><FONT color=rgb(0,0,255)>卸货口电话<BR>Dock Tel</FONT></B> </TD>
<TD>011-1111 </TD>
<TD><B><FONT color=rgb(0,0,255)>卸货口地址<BR>Dock Address</FONT></B> </TD>
<TD>adress </TD></TR>
<TR class=table_c2>
<TD size="20%"><B><FONT color=rgb(0,0,255)>交货地点<BR>Delivery
Place</FONT></B> </TD>
<TD colSpan=3></TD></TR>
<TR>
<TD><B><FONT color=rgb(0,0,255)>计划跟踪员<BR>Follow Up</FONT></B> </TD>
<TD>kkkk </TD>
<TD><B><FONT color=rgb(0,0,255)>计划跟踪员电话/传真<BR>FollowUp
Tel/Fax</FONT></B> </TD>
<TD>011-1111</TD></TR>
<TR class=table_c2>
<TD><B><FONT color=rgb(0,0,255)>交货说明<BR>Delivery Note</FONT></B> </TD>
<TD colSpan=3></TD></TR></TBODY></TABLE><BR>
<TABLE border=1 width="95%" align=center>
<TBODY>
<TR class=table_head>
<TD colSpan=14 align=left><B><FONT>Part Information 零件清单</FONT></B>
</TD></TR>
<TR>
<TD align=middle><FONT color=rgb(0,0,0)>序号</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>零件号</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>零件说明</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>需求数量</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>承诺数量</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>实收数量</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>包装数</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>料箱数</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>料箱号</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>实发料箱号</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>实发料箱数</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>实收料箱号</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>实收料箱数</FONT></TD>
<TD align=middle><FONT color=rgb(0,0,0)>备注</FONT></TD></TR>
<TR>
<TD align=middle>1</TD>
<TD align=middle><font color = "blue">12647212</font></TD>
<TD align=middle></TD>
<TD align=middle><font color = "blue">60</font></TD>
<TD align=middle>60 </TD>
<TD align=middle></TD>
<TD align=middle><font color = "blue">15</font></TD>
<TD align=middle><font color = "blue">4</font></TD>
<TD align=middle><font color = "blue">P000000D</font></TD>
<TD align=middle></TD>
<TD align=middle></TD>
<TD align=middle></TD>
<TD align=middle></TD>
<TD align=middle></TD></TR>
<TR class=table_c2>
<TD align=middle>2</TD>
<TD align=middle><font color = "blue">12654172</font></TD>
<TD align=middle></TD>
<TD align=middle><font color = "blue">615</font></TD>
<TD align=middle>615 </TD>
<TD align=middle></TD>
<TD align=middle><font color = "blue">15</font></TD>
<TD align=middle><font color = "blue">41</font></TD>
<TD align=middle><font color = "blue">P000000D</font></TD>
<TD align=middle></TD>
<TD align=middle></TD>
<TD align=middle></TD>
<TD align=middle></TD>
<TD align=middle></TD></TR></TBODY></TABLE><BR>';
$result = array();
preg_match_all('#<font\s*color\s*=\s*"blue">(.*)</font>#iUus',$string,$result);
print_r($result[1]);
preg_match_all('#<td.+/td>#isU', $s, $r);
$r = array_map('trim', array_map('strip_tags', $r[0]));
print_r($r);
Array读取某项内容不是什么难事吧?
(
[0] =>
[1] => aaaaaa aaaaaa
[2] => aaaaaa xxxx(aaaaaa)
[3] => aaaaaa xxxx
[4] => adress
aaaaaa adress
[5] =>
[6] => 交货计划单
[7] => 计划到达时间 2013-09-16
[8] => PUS编号
770266110 版本00
[9] => Customer 客户
[10] => *DYNP-770266110-00*
[11] =>
[12] => Delivery Information
交货信息
[13] => 工厂Plant
[14] => xxxxxx
[15] => 取货时间Pick Up
Time
[16] => 2013-09-09 16:30
[17] => 需要供应商反馈Need Duns
Response
[18] => N
[19] => 交货日期Delivery
Date
[20] => 2013-09-16
[21] => 窗口时间Window
Time
[22] => 16:30
[23] => 卸货口Dock
[24] => CC-70D
[25] => 卸货口负责人Dock Incharger
[26] => kkk
[27] => 卸货口电话Dock Tel
[28] => 011-1111
[29] => 卸货口地址Dock Address
[30] => adress
[31] => 交货地点Delivery
Place
[32] =>
[33] => 计划跟踪员Follow Up
[34] => kkkk
[35] => 计划跟踪员电话/传真FollowUp
Tel/Fax
[36] => 011-1111
[37] => 交货说明Delivery Note
[38] =>
[39] => Part Information 零件清单
[40] => 序号
[41] => 零件号
[42] => 零件说明
[43] => 需求数量
[44] => 承诺数量
[45] => 实收数量
[46] => 包装数
[47] => 料箱数
[48] => 料箱号
[49] => 实发料箱号
[50] => 实发料箱数
[51] => 实收料箱号
[52] => 实收料箱数
[53] => 备注
[54] => 1
[55] => 12647212
[56] =>
[57] => 60
[58] => 60
[59] =>
[60] => 15
[61] => 4
[62] => P000000D
[63] =>
[64] =>
[65] =>
[66] =>
[67] =>
[68] => 2
[69] => 12654172
[70] =>
[71] => 615
[72] => 615
[73] =>
[74] => 15
[75] => 41
[76] => P000000D
[77] =>
[78] =>
[79] =>
[80] =>
[81] =>
)
//第二个表从下标 40 开始,14 列
$t = array_chunk(array_slice($r, 40), 14);
for($i=1; $i<count($t); $i++)
$res[] = array_combine($t[0], $t[$i]);
print_r($res);
Array
(
[0] => Array
(
[序号] => 1
[零件号] => 12647212
[零件说明] =>
[需求数量] => 60
[承诺数量] => 60
[实收数量] =>
[包装数] => 15
[料箱数] => 4
[料箱号] => P000000D
[实发料箱号] =>
[实发料箱数] =>
[实收料箱号] =>
[实收料箱数] =>
[备注] =>
)
[1] => Array
(
[序号] => 2
[零件号] => 12654172
[零件说明] =>
[需求数量] => 615
[承诺数量] => 615
[实收数量] =>
[包装数] => 15
[料箱数] => 41
[料箱号] => P000000D
[实发料箱号] =>
[实发料箱数] =>
[实收料箱号] =>
[实收料箱数] =>
[备注] =>
)
)