poi 读取word以及word中的表格
????? 读取word2003文本
public String readDoc(File docFile) {String text2003 = null;try {// word 2003: 图片不会被读取InputStream is = new FileInputStream(docFile);WordExtractor ex = new WordExtractor(is);text2003 = ex.getText();System.out.println(text2003);is.close();} catch (IOException e) {e.printStackTrace();}return text2003;}
?读取word2003中表格内容:
public String readExcelInWord(File file) {String text2003 = null;try {FileInputStream in = new FileInputStream(file);// 载入文档POIFSFileSystem pfs = new POIFSFileSystem(in);HWPFDocument hwpf = new HWPFDocument(pfs);Range range = hwpf.getRange();// 得到文档的读取范围TableIterator it = new TableIterator(range);// 迭代文档中的表格while (it.hasNext()) {Table tb = (Table) it.next();// 迭代行,默认从0开始for (int i = 0; i < tb.numRows(); i++) {TableRow tr = tb.getRow(i);// 迭代列,默认从0开始for (int j = 0; j < tr.numCells(); j++) {TableCell td = tr.getCell(j);// 取得单元格// 循环单元格的段落for (int k = 0; k < td.numParagraphs(); k++) {Paragraph para = td.getParagraph(k);String s = para.text().trim();System.out.println(s);} } // end for cells} // end for rows} // end while} catch (Exception e) {e.printStackTrace();}return text2003;}
?Apache POI: http://poi.apache.org/download.html