@Test
public void testDoc() throws IOException {
HWPFDocument document = new HWPFDocument(new FileInputStream("D:\\Xiaoi\\Items\\2019-07-02 合同智能分析工具\\04_现场数据\\4.25国网北京信通公司110kV半壁店站等63个站点通信蓄电池改造勘察设计合同.doc"));
// for ( FieldsDocumentPart part : FieldsDocumentPart.values() ) {
// System.out.println( "=== Document part: " + part + " ===" );
// for ( Field field : document.getFields().getFields( part ) ) {
// System.out.println(field.firstSubrange(document.getRange()).getParagraph(0).text());
// }
// }
for (int i=0;i<document.getRange().numParagraphs();i++){
Paragraph paragraph = document.getRange().getParagraph(i);
System.out.println(paragraph.text());
System.out.println("===============================================");
}
// for (Field field : document.getFields().getFields(FieldsDocumentPart.MAIN)){
// System.out.println(field);
// System.out.println(field.firstSubrange(document.getRange()).getParagraph(0).text());
// }
// System.out.println(document.getRange().getParagraph(0).text());
// System.out.println(document.getMainTextboxRange().getSection(0).text());
}
/**
* Get the text from the word file, as an array with one String
* per paragraph
*/
public static String[] getWordParagraphText(HWPFDocument doc) {
String[] ret;
// Extract using the model code
try {
Range r = doc.getRange();
ret = new String[r.numParagraphs()];
for(int i=0; i<ret.length; i++) {
Paragraph p = r.getParagraph(i);
ret[i] = p.text();
// Fix the line ending
if(ret[i].endsWith("\r")) {
ret[i] = ret[i] + "\n";
}
}
}
catch(Exception e) {
// Something's up with turning the text pieces into paragraphs
// Fall back to ripping out the text pieces
ret = new String[1];
ret[0] = getWordTextFromPieces(doc);
}
return ret;
}