首页 诗词 字典 板报 句子 名言 友答 励志 学校 网站地图
当前位置: 首页 > 教程频道 > 软件管理 > PowerDesigner >

pdf资料转文本

2012-06-30 
pdf文件转文本import java.awt.geom.AffineTransformimport java.util.Iteratorimport de.intarsys.pdf.

pdf文件转文本

import java.awt.geom.AffineTransform;import java.util.Iterator;import de.intarsys.pdf.content.CSDeviceBasedInterpreter;import de.intarsys.pdf.content.CSException;import de.intarsys.pdf.content.text.CSTextExtractor;import de.intarsys.pdf.pd.PDDocument;import de.intarsys.pdf.pd.PDPage;import de.intarsys.pdf.pd.PDPageNode;import de.intarsys.pdf.pd.PDPageTree;import de.intarsys.pdf.tools.kernel.PDFGeometryTools;import de.intarsys.tools.locator.FileLocator;public class Pdf2Text {protected static void extractText(PDPageTree pageTree, StringBuilder sb) {for (Iterator it = pageTree.getKids().iterator(); it.hasNext();) {PDPageNode node = (PDPageNode) it.next();if (node.isPage()) {try {CSTextExtractor extractor = new CSTextExtractor();PDPage page = (PDPage) node;AffineTransform pageTx = new AffineTransform();PDFGeometryTools.adjustTransform(pageTx, page);extractor.setDeviceTransform(pageTx);CSDeviceBasedInterpreter interpreter = new CSDeviceBasedInterpreter(null, extractor);interpreter.process(page.getContentStream(), page.getResources());sb.append(extractor.getContent());} catch (CSException e) {e.printStackTrace();}} else {extractText((PDPageTree) node, sb);}}}public static void main(String[] args) {FileLocator locator = new FileLocator("D:\\adobe\\iText中文教程.pdf");PDDocument doc = null;try {doc = PDDocument.createFromLocator(locator);} catch (Exception e) {e.printStackTrace();}StringBuilder sb = new StringBuilder();extractText(doc.getPageTree(), sb);System.out.println(sb);}}
?

热点排行