|
@@ -1,147 +0,0 @@
|
|
-package com.qmth.cqb.utils;
|
|
|
|
-
|
|
|
|
-import java.io.ByteArrayOutputStream;
|
|
|
|
-import java.io.FileOutputStream;
|
|
|
|
-import java.io.OutputStream;
|
|
|
|
-import org.docx4j.Docx4J;
|
|
|
|
-import org.docx4j.Docx4jProperties;
|
|
|
|
-import org.docx4j.convert.out.ConversionFeatures;
|
|
|
|
-import org.docx4j.convert.out.HTMLSettings;
|
|
|
|
-import org.docx4j.convert.out.html.SdtToListSdtTagHandler;
|
|
|
|
-import org.docx4j.convert.out.html.SdtWriter;
|
|
|
|
-import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
|
|
|
|
-import org.docx4j.samples.AbstractSample;
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-public class ConvertOutHtml extends AbstractSample {
|
|
|
|
-
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- // Config for non-command line version
|
|
|
|
- static {
|
|
|
|
-
|
|
|
|
- inputfilepath = System.getProperty("user.dir") + "/test/test.docx";
|
|
|
|
-
|
|
|
|
- save = false;
|
|
|
|
- nestLists = false;
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- static boolean save;
|
|
|
|
- static boolean nestLists;
|
|
|
|
-
|
|
|
|
- public static void main(String[] args)
|
|
|
|
- throws Exception {
|
|
|
|
-
|
|
|
|
- try {
|
|
|
|
- getInputFilePath(args);
|
|
|
|
- } catch (IllegalArgumentException e) {
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // Document loading (required)
|
|
|
|
- WordprocessingMLPackage wordMLPackage;
|
|
|
|
- if (inputfilepath==null) {
|
|
|
|
- // Create a docx
|
|
|
|
- System.out.println("No imput path passed, creating dummy document");
|
|
|
|
- wordMLPackage = WordprocessingMLPackage.createPackage();
|
|
|
|
- //SampleDocument.createContent(wordMLPackage.getMainDocumentPart());
|
|
|
|
- } else {
|
|
|
|
- System.out.println("Loading file from " + inputfilepath);
|
|
|
|
- wordMLPackage = Docx4J.load(new java.io.File(inputfilepath));
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // HTML exporter setup (required)
|
|
|
|
- // .. the HTMLSettings object
|
|
|
|
- HTMLSettings htmlSettings = Docx4J.createHTMLSettings();
|
|
|
|
-
|
|
|
|
- htmlSettings.setImageDirPath(inputfilepath + "_files");
|
|
|
|
- htmlSettings.setImageTargetUri(inputfilepath.substring(inputfilepath.lastIndexOf("/")+1)
|
|
|
|
- + "_files");
|
|
|
|
- htmlSettings.setWmlPackage(wordMLPackage);
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- /* CSS reset, see http://itumbcom.blogspot.com.au/2013/06/css-reset-how-complex-it-should-be.html
|
|
|
|
- *
|
|
|
|
- * motivated by vertical space in tables in Firefox and Google Chrome.
|
|
|
|
-
|
|
|
|
- If you have unwanted vertical space, in Chrome this may be coming from -webkit-margin-before and -webkit-margin-after
|
|
|
|
- (in Firefox, margin-top is set to 1em in html.css)
|
|
|
|
-
|
|
|
|
- Setting margin: 0 on p is enough to fix it.
|
|
|
|
-
|
|
|
|
- See further http://www.css-101.org/articles/base-styles-sheet-for-webkit-based-browsers/
|
|
|
|
- */
|
|
|
|
- String userCSS = null;
|
|
|
|
- if (nestLists) {
|
|
|
|
- // use browser defaults for ol, ul, li
|
|
|
|
- userCSS = "html, body, div, span, h1, h2, h3, h4, h5, h6, p, a, img, table, caption, tbody, tfoot, thead, tr, th, td " +
|
|
|
|
- "{ margin: 0; padding: 0; border: 0;}" +
|
|
|
|
- "body {line-height: 1;} ";
|
|
|
|
- } else {
|
|
|
|
- userCSS = "html, body, div, span, h1, h2, h3, h4, h5, h6, p, a, img, ol, ul, li, table, caption, tbody, tfoot, thead, tr, th, td " +
|
|
|
|
- "{ margin: 0; padding: 0; border: 0;}" +
|
|
|
|
- "body {line-height: 1;} ";
|
|
|
|
-
|
|
|
|
- }
|
|
|
|
- htmlSettings.setUserCSS(userCSS);
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- //Other settings (optional)
|
|
|
|
-// htmlSettings.setUserBodyTop("<H1>TOP!</H1>");
|
|
|
|
-// htmlSettings.setUserBodyTail("<H1>TAIL!</H1>");
|
|
|
|
-
|
|
|
|
- // Sample sdt tag handler (tag handlers insert specific
|
|
|
|
- // html depending on the contents of an sdt's tag).
|
|
|
|
- // This will only have an effect if the sdt tag contains
|
|
|
|
- // the string @class=XXX
|
|
|
|
-// SdtWriter.registerTagHandler("@class", new TagClass() );
|
|
|
|
-
|
|
|
|
-// SdtWriter.registerTagHandler(Containerization.TAG_BORDERS, new TagSingleBox() );
|
|
|
|
-// SdtWriter.registerTagHandler(Containerization.TAG_SHADING, new TagSingleBox() );
|
|
|
|
-
|
|
|
|
-
|
|
|
|
- // list numbering: depending on whether you want list numbering hardcoded, or done using <li>.
|
|
|
|
- if (nestLists) {
|
|
|
|
- SdtWriter.registerTagHandler("HTML_ELEMENT", new SdtToListSdtTagHandler());
|
|
|
|
- } else {
|
|
|
|
- htmlSettings.getFeatures().remove(ConversionFeatures.PP_HTML_COLLECT_LISTS);
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // output to an OutputStream.
|
|
|
|
- OutputStream os;
|
|
|
|
- if (save) {
|
|
|
|
- os = new FileOutputStream(inputfilepath + ".html");
|
|
|
|
- } else {
|
|
|
|
- os = new ByteArrayOutputStream();
|
|
|
|
-
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // If you want XHTML output
|
|
|
|
- Docx4jProperties.setProperty("docx4j.Convert.Out.HTML.OutputMethodXML", true);
|
|
|
|
-
|
|
|
|
- //Don't care what type of exporter you use
|
|
|
|
- Docx4J.toHTML(htmlSettings, os, Docx4J.FLAG_NONE);
|
|
|
|
- //Prefer the exporter, that uses a xsl transformation
|
|
|
|
- //Docx4J.toHTML(htmlSettings, os, Docx4J.FLAG_EXPORT_PREFER_XSL);
|
|
|
|
- //Prefer the exporter, that doesn't use a xsl transformation (= uses a visitor)
|
|
|
|
-// Docx4J.toHTML(htmlSettings, os, Docx4J.FLAG_EXPORT_PREFER_NONXSL);
|
|
|
|
- //String originalHtml = new String(os.toString(), "UTF-8");
|
|
|
|
- if (save) {
|
|
|
|
- System.out.println("Saved: " + inputfilepath + ".html ");
|
|
|
|
- } else {
|
|
|
|
- //System.out.println( ((ByteArrayOutputStream)os).toString() );
|
|
|
|
- String originalHtml = ((ByteArrayOutputStream)os).toString();
|
|
|
|
- //System.out.println(CommonUtils.extractText(html));
|
|
|
|
- originalHtml = originalHtml.substring(originalHtml.indexOf("<body>") + "<body>".length(),originalHtml.indexOf("</body"));
|
|
|
|
- originalHtml = originalHtml.substring(originalHtml.indexOf("<p"),originalHtml.lastIndexOf("</span>"))+"</span></p>";
|
|
|
|
- String[] trr = originalHtml.split("\\[.*试题分类.*\\]:");
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
- // Clean up, so any ObfuscatedFontPart temp files can be deleted
|
|
|
|
- if (wordMLPackage.getMainDocumentPart().getFontTablePart()!=null) {
|
|
|
|
- wordMLPackage.getMainDocumentPart().getFontTablePart().deleteEmbeddedFontTempFiles();
|
|
|
|
- }
|
|
|
|
- // This would also do it, via finalize() methods
|
|
|
|
- htmlSettings = null;
|
|
|
|
- wordMLPackage = null;
|
|
|
|
- }
|
|
|
|
-}
|
|
|