|
@@ -0,0 +1,147 @@
|
|
|
+package com.qmth.cqb.utils;
|
|
|
+
|
|
|
+import java.io.ByteArrayOutputStream;
|
|
|
+import java.io.FileOutputStream;
|
|
|
+import java.io.OutputStream;
|
|
|
+import org.docx4j.Docx4J;
|
|
|
+import org.docx4j.Docx4jProperties;
|
|
|
+import org.docx4j.convert.out.ConversionFeatures;
|
|
|
+import org.docx4j.convert.out.HTMLSettings;
|
|
|
+import org.docx4j.convert.out.html.SdtToListSdtTagHandler;
|
|
|
+import org.docx4j.convert.out.html.SdtWriter;
|
|
|
+import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
|
|
|
+import org.docx4j.samples.AbstractSample;
|
|
|
+
|
|
|
+
|
|
|
+public class ConvertOutHtml extends AbstractSample {
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ static {
|
|
|
+
|
|
|
+ inputfilepath = System.getProperty("user.dir") + "/test/test.docx";
|
|
|
+
|
|
|
+ save = false;
|
|
|
+ nestLists = false;
|
|
|
+ }
|
|
|
+
|
|
|
+ static boolean save;
|
|
|
+ static boolean nestLists;
|
|
|
+
|
|
|
+ public static void main(String[] args)
|
|
|
+ throws Exception {
|
|
|
+
|
|
|
+ try {
|
|
|
+ getInputFilePath(args);
|
|
|
+ } catch (IllegalArgumentException e) {
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ WordprocessingMLPackage wordMLPackage;
|
|
|
+ if (inputfilepath==null) {
|
|
|
+
|
|
|
+ System.out.println("No imput path passed, creating dummy document");
|
|
|
+ wordMLPackage = WordprocessingMLPackage.createPackage();
|
|
|
+
|
|
|
+ } else {
|
|
|
+ System.out.println("Loading file from " + inputfilepath);
|
|
|
+ wordMLPackage = Docx4J.load(new java.io.File(inputfilepath));
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ HTMLSettings htmlSettings = Docx4J.createHTMLSettings();
|
|
|
+
|
|
|
+ htmlSettings.setImageDirPath(inputfilepath + "_files");
|
|
|
+ htmlSettings.setImageTargetUri(inputfilepath.substring(inputfilepath.lastIndexOf("/")+1)
|
|
|
+ + "_files");
|
|
|
+ htmlSettings.setWmlPackage(wordMLPackage);
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ *
|
|
|
+ * motivated by vertical space in tables in Firefox and Google Chrome.
|
|
|
+
|
|
|
+ If you have unwanted vertical space, in Chrome this may be coming from -webkit-margin-before and -webkit-margin-after
|
|
|
+ (in Firefox, margin-top is set to 1em in html.css)
|
|
|
+
|
|
|
+ Setting margin: 0 on p is enough to fix it.
|
|
|
+
|
|
|
+ See further http:
|
|
|
+ */
|
|
|
+ String userCSS = null;
|
|
|
+ if (nestLists) {
|
|
|
+
|
|
|
+ userCSS = "html, body, div, span, h1, h2, h3, h4, h5, h6, p, a, img, table, caption, tbody, tfoot, thead, tr, th, td " +
|
|
|
+ "{ margin: 0; padding: 0; border: 0;}" +
|
|
|
+ "body {line-height: 1;} ";
|
|
|
+ } else {
|
|
|
+ userCSS = "html, body, div, span, h1, h2, h3, h4, h5, h6, p, a, img, ol, ul, li, table, caption, tbody, tfoot, thead, tr, th, td " +
|
|
|
+ "{ margin: 0; padding: 0; border: 0;}" +
|
|
|
+ "body {line-height: 1;} ";
|
|
|
+
|
|
|
+ }
|
|
|
+ htmlSettings.setUserCSS(userCSS);
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ if (nestLists) {
|
|
|
+ SdtWriter.registerTagHandler("HTML_ELEMENT", new SdtToListSdtTagHandler());
|
|
|
+ } else {
|
|
|
+ htmlSettings.getFeatures().remove(ConversionFeatures.PP_HTML_COLLECT_LISTS);
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ OutputStream os;
|
|
|
+ if (save) {
|
|
|
+ os = new FileOutputStream(inputfilepath + ".html");
|
|
|
+ } else {
|
|
|
+ os = new ByteArrayOutputStream();
|
|
|
+
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ Docx4jProperties.setProperty("docx4j.Convert.Out.HTML.OutputMethodXML", true);
|
|
|
+
|
|
|
+
|
|
|
+ Docx4J.toHTML(htmlSettings, os, Docx4J.FLAG_NONE);
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+
|
|
|
+ if (save) {
|
|
|
+ System.out.println("Saved: " + inputfilepath + ".html ");
|
|
|
+ } else {
|
|
|
+
|
|
|
+ String originalHtml = ((ByteArrayOutputStream)os).toString();
|
|
|
+
|
|
|
+ originalHtml = originalHtml.substring(originalHtml.indexOf("<body>") + "<body>".length(),originalHtml.indexOf("</body"));
|
|
|
+ originalHtml = originalHtml.substring(originalHtml.indexOf("<p"),originalHtml.lastIndexOf("</span>"))+"</span></p>";
|
|
|
+ String[] trr = originalHtml.split("\\[.*试题分类.*\\]:");
|
|
|
+ }
|
|
|
+
|
|
|
+
|
|
|
+ if (wordMLPackage.getMainDocumentPart().getFontTablePart()!=null) {
|
|
|
+ wordMLPackage.getMainDocumentPart().getFontTablePart().deleteEmbeddedFontTempFiles();
|
|
|
+ }
|
|
|
+
|
|
|
+ htmlSettings = null;
|
|
|
+ wordMLPackage = null;
|
|
|
+ }
|
|
|
+}
|