Java源码示例:org.apache.poi.hwpf.converter.WordToHtmlConverter

示例1
public static String parseDoc2Html(InputStream input, String charset) throws Exception {
  HWPFDocument wordDocument = new HWPFDocument(input);
  Document doc = DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument();
  WordToHtmlConverter converter = new WordToHtmlConverter(doc);
  converter.processDocument(wordDocument);

  ByteArrayOutputStream output = new ByteArrayOutputStream();
  try {
    DOMSource domSource = new DOMSource(converter.getDocument());
    StreamResult streamResult = new StreamResult(output);
    Transformer serializer = TransformerFactory.newInstance().newTransformer();
    // TODO 有乱码
    serializer.setOutputProperty(OutputKeys.ENCODING, charset);
    serializer.setOutputProperty(OutputKeys.INDENT, "yes");
    serializer.setOutputProperty(OutputKeys.METHOD, "html");
    serializer.transform(domSource, streamResult);
  } finally {
    input.close();
    output.close();
  }

  return new String(output.toByteArray());
}
 
示例2
/**
 * WORD转HTML
 * 
 * @param docfile
 *            WORD文件全路径
 * @param htmlfile
 *            转换后HTML存放路径
 * @throws Throwable
 * add by duanql	2013-07-17
 */

public void WordConverterHtml(String docfile, String htmlfile){
	try {
	InputStream input = new FileInputStream(docfile);
	HWPFDocument wordDocument = new HWPFDocument(input);
	WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
	wordToHtmlConverter.processDocument(wordDocument);
	Document htmlDocument = wordToHtmlConverter.getDocument();
	ByteArrayOutputStream outStream = new ByteArrayOutputStream();
	DOMSource domSource = new DOMSource(htmlDocument);
	StreamResult streamResult = new StreamResult(outStream);

	TransformerFactory tf = TransformerFactory.newInstance();
	Transformer serializer = tf.newTransformer();
	serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
	serializer.setOutputProperty(OutputKeys.INDENT, "yes");
	serializer.setOutputProperty(OutputKeys.METHOD, "html");
	serializer.transform(domSource, streamResult);
	outStream.close();

	String content = new String(outStream.toByteArray(), "UTF-8");
	stringToFile(content,htmlfile);
	} catch (Exception e) {
		e.printStackTrace();
	}
}
 
示例3
/**
 * WORD转HTML
 * 
 * @param docfile
 *            WORD文件全路径
 * @param htmlfile
 *            转换后HTML存放路径
 * @throws Throwable
 * add by duanql	2013-07-17
 */

public void WordConverterHtml(String docfile, String htmlfile){
	try {
	InputStream input = new FileInputStream(docfile);
	HWPFDocument wordDocument = new HWPFDocument(input);
	WordToHtmlConverter wordToHtmlConverter = new WordToHtmlConverter(DocumentBuilderFactory.newInstance().newDocumentBuilder().newDocument());
	wordToHtmlConverter.processDocument(wordDocument);
	Document htmlDocument = wordToHtmlConverter.getDocument();
	ByteArrayOutputStream outStream = new ByteArrayOutputStream();
	DOMSource domSource = new DOMSource(htmlDocument);
	StreamResult streamResult = new StreamResult(outStream);

	TransformerFactory tf = TransformerFactory.newInstance();
	Transformer serializer = tf.newTransformer();
	serializer.setOutputProperty(OutputKeys.ENCODING, "UTF-8");
	serializer.setOutputProperty(OutputKeys.INDENT, "yes");
	serializer.setOutputProperty(OutputKeys.METHOD, "html");
	serializer.transform(domSource, streamResult);
	outStream.close();

	String content = new String(outStream.toByteArray(), "UTF-8");
	stringToFile(content,htmlfile);
	} catch (Exception e) {
		e.printStackTrace();
	}
}