Java源码示例:org.apache.poi.hwpf.converter.WordToHtmlConverter
示例1
public static String parseDoc2Html(InputStream input, String charset) throws Exception {
HWPFDocument wordDocument = new HWPFDocument(input);
Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
WordToHtmlConverter converter = new WordToHtmlConverter(doc);
converter.processDocument(wordDocument);
ByteArrayOutputStream output = new ByteArrayOutputStream();
try {
DOMSource domSource = new DOMSource(converter.getDocument());
StreamResult streamResult = new StreamResult(output);
Transformer serializer = TransformerFactory.newInstance().newTransformer();
// TODO 有乱码
serializer.setOutputProperty(OutputKeys.ENCODING, charset);
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
} finally {
input.close();
output.close();
}
return new String(output.toByteArray());
}
示例2
/**
* WORD转HTML
*
* @param docfile
* WORD文件全路径
* @param htmlfile
* 转换后HTML存放路径
* @throws Throwable
* add by duanql 2013-07-17
*/
public void WordConverterHtml(String docfile, String htmlfile){
try {
InputStream input = new FileInputStream(docfile);
HWPFDocument wordDocument = new HWPFDocument(input);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
String content = new String(outStream.toByteArray(), "UTF-8");
stringToFile(content,htmlfile);
} catch (Exception e) {
e.printStackTrace();
}
}
示例3
/**
* WORD转HTML
*
* @param docfile
* WORD文件全路径
* @param htmlfile
* 转换后HTML存放路径
* @throws Throwable
* add by duanql 2013-07-17
*/
public void WordConverterHtml(String docfile, String htmlfile){
try {
InputStream input = new FileInputStream(docfile);
HWPFDocument wordDocument = new HWPFDocument(input);
WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
wordToHtmlConverter.processDocument(wordDocument);
Document htmlDocument = wordToHtmlConverter.getDocument();
ByteArrayOutputStream outStream = new ByteArrayOutputStream();
DOMSource domSource = new DOMSource(htmlDocument);
StreamResult streamResult = new StreamResult(outStream);
TransformerFactory tf = TransformerFactory.newInstance();
Transformer serializer = tf.newTransformer();
serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
serializer.setOutputProperty(OutputKeys.INDENT, "yes");
serializer.setOutputProperty(OutputKeys.METHOD, "html");
serializer.transform(domSource, streamResult);
outStream.close();
String content = new String(outStream.toByteArray(), "UTF-8");
stringToFile(content,htmlfile);
} catch (Exception e) {
e.printStackTrace();
}
}