PDF处理两例
1,PDF生成图片
java -cp pdfbox-app-1.5.0.jar org.apache.pdfbox.ExtractImages 8086.pdf
2,PDF取得文本
public String getTextFromPDF(String pdfFilePath) {
String result = null;
FileInputStream is = null;
PDDocument document = null;
try {
is = new FileInputStream(pdfFilePath);
PDFParser parser = new PDFParser(is);
parser.parse();
document = parser.getPDDocument();
PDFTextStripper stripper = new PDFTextStripper();
result = stripper.getText(document);
} catch (FileNotFoundException e) {
e.printStackTrace();
} catch (IOException e) {
e.printStackTrace();
} finally {
if (is != null) {
try {
is.close();
} catch (IOException e) {
// TODO Auto-generated catch block
e.printStackTrace();
}
}
if (document != null) {
try {
document.close();
} catch (IOException e) {
e.printStackTrace();
}
}
}
return result;
}