Bläddra i källkod

修复html转word的bug

宋悦 7 år sedan
förälder
incheckning
3a260c386f

+ 40 - 9
cqb-comm-utils/src/main/java/com/qmth/cqb/utils/CommonUtils.java

@@ -1,8 +1,6 @@
 package com.qmth.cqb.utils;
 
-import java.io.BufferedReader;
-import java.io.InputStream;
-import java.io.InputStreamReader;
+import java.io.*;
 import java.math.BigDecimal;
 import java.math.RoundingMode;
 import java.text.SimpleDateFormat;
@@ -15,6 +13,7 @@ import java.util.regex.Matcher;
 import java.util.regex.Pattern;
 import java.util.stream.Stream;
 
+import com.qmth.cqb.utils.word.DocxProcessUtil;
 import org.apache.commons.lang3.StringEscapeUtils;
 import org.apache.commons.lang3.StringUtils;
 import org.dom4j.Attribute;
@@ -24,6 +23,8 @@ import org.dom4j.DocumentHelper;
 import org.dom4j.Element;
 
 import cn.com.qmth.examcloud.common.dto.question.enums.QuesStructType;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
 
 /**
  * Created by songyue on 16/12/27.
@@ -39,6 +40,8 @@ public final class CommonUtils {
     public static final String PAPER_TITLE="中国石油大学";
     public static final String PAPER_SUB_TITLE="网络教育";
 
+    private static final Logger log = LoggerFactory.getLogger(CommonUtils.class);
+
     /**
      * 加载properties配置文件
      * 
@@ -53,7 +56,7 @@ public final class CommonUtils {
             BufferedReader bf = new BufferedReader(new InputStreamReader(inputStream));
             properties.load(bf);
         } catch (Exception e) {
-            e.printStackTrace();
+            log.error("加载配置异常:",e.getMessage());
         }
         return properties;
     }
@@ -272,11 +275,22 @@ public final class CommonUtils {
 		return idValues;
     }
 
+    /**
+     * 保留两位小数
+     * @param number
+     * @return
+     */
     public static double formatDouble(double number){
         BigDecimal formatNumber = new BigDecimal(number);
         return formatNumber.setScale(2, RoundingMode.HALF_UP).doubleValue();
     }
 
+    /**
+     * 补全html标签
+     * @param htmlStr
+     * @return
+     * @throws Exception
+     */
     public static String repairHtmlStr(String htmlStr)throws Exception{
         htmlStr = htmlStr.trim();
         if(htmlStr.toLowerCase().contains("<!doctype html ")){
@@ -320,7 +334,8 @@ public final class CommonUtils {
             }
         }
         //添加body标签
-        if(!htmlStr.toLowerCase().contains("<body")){
+        if((htmlStr.toLowerCase().contains("<p") || htmlStr.toLowerCase().contains("<span"))
+                && !htmlStr.toLowerCase().contains("<body")){
             htmlStr = "<body>"+htmlStr+"</body>";
         }
         return new String(htmlStr.getBytes("UTF-8"));
@@ -357,6 +372,12 @@ public final class CommonUtils {
         }
     }
 
+    /**
+     * 格式化html
+     * @param htmlStr
+     * @return
+     * @throws Exception
+     */
     public static String formatHtml(String htmlStr)throws Exception{
         if(StringUtils.isEmpty(htmlStr)){
             return "";
@@ -365,13 +386,23 @@ public final class CommonUtils {
         htmlStr = StringEscapeUtils.unescapeHtml4(htmlStr);
         return htmlStr;
     }
-    
 
-    public static void main(String[] args) {
+    /**
+     * 过滤非空格的空字符
+     * @param str
+     * @return
+     */
+    public static String trimNoBlankSpace(final String str){
+        if(str == null || str.length() == 0){
+            return "";
+        }
+        return str.replaceAll("[\\t\\r\\f]*","");
+    }
+
+    public static void main(String[] args) throws Exception{
         // QuesStructType quesStructType = getEnum(QuesStructType.class,"单选");
         // System.out.println(quesStructType.getName());
         // System.out.println(characterToNumber("A"));
-        System.out.println(toCHNum(111111));
-
+        System.out.println("");
     }
 }

+ 6 - 88
cqb-comm-utils/src/main/java/com/qmth/cqb/utils/word/DocxProcessUtil.java

@@ -581,14 +581,8 @@ public final class DocxProcessUtil {
         namespaces.stream().forEach(namespace -> {
             root.remove(namespace);
         });
-        List<org.dom4j.Element> elements = root.elements();
-        elements.stream().forEach(element ->{
-        	if("w:bookmarkStart".equals(element.getQualifiedName())||
-                	"w:bookmarkEnd".equals(element.getQualifiedName())){
-        		root.remove(element);
-        	}
-        });
-        return root.asXML();
+        String returnXml = CommonUtils.trimNoBlankSpace(root.asXML());
+        return returnXml;
     }
 
     /**
@@ -900,93 +894,17 @@ public final class DocxProcessUtil {
         XHTMLImporter.setHyperlinkStyle("Hyperlink");
         String wordMl = "";
         wordMLPackage.getMainDocumentPart().getContent().addAll(
-                    XHTMLImporter.convert( repairHtmlStr(html), TEMP_FILE_IMP) );
+                    XHTMLImporter.convert(html, TEMP_FILE_IMP) );
+        //转换完后就初始化image
+        initPkgImage(wordMLPackage);
         // 获取word文档中所有段落
         List<Object> pList = getAllElementFromObject(wordMLPackage.getMainDocumentPart(), P.class);
         for(Object p:pList){
-            wordMl += XmlUtils.marshaltoString(p);
+            wordMl += formatPWordMl(XmlUtils.marshaltoString(p));
         }
-//            wordMl = formatPWordMl(wordMl);
-        initPkgImage(wordMLPackage);
         return wordMl;
     }
 
-    public static String repairHtmlStr(String htmlStr)throws Exception{
-        htmlStr = htmlStr.trim();
-        if(htmlStr.toLowerCase().contains("<!doctype html ")){
-            int index1 = htmlStr.toLowerCase().indexOf("<!doctype html ");
-            int index2 = htmlStr.indexOf('>',index1 + 1);
-            htmlStr = htmlStr.substring(0, index1) + htmlStr.substring(index2 + 1);
-        }
-        while(htmlStr.toLowerCase().contains("<br ")){
-            int index1 = htmlStr.toLowerCase().indexOf("<br ");
-            int index2 = htmlStr.toLowerCase().indexOf(">",index1 + 1);
-            htmlStr = htmlStr.substring(0, index1) + "<br/>" + htmlStr.substring(index2 + 1);
-        }
-        while(htmlStr.toLowerCase().endsWith("<br>") || htmlStr.toLowerCase().endsWith("<br/>")){
-            if(htmlStr.toLowerCase().endsWith("<br>")){
-                htmlStr = htmlStr.substring(0, htmlStr.length()-"<br>".length());
-            }else if(htmlStr.toLowerCase().endsWith("<br/>")){
-                htmlStr = htmlStr.substring(0, htmlStr.length()-"<br/>".length());
-            }
-        }
-        htmlStr = htmlStr.replace("<br>", "<br/>").replace("<BR>", "<br/>");
-
-        {//补全META标签
-            int imgIndex = indexOfRegex(htmlStr,"<((meta)|(META)) ");
-            while(imgIndex > 0){
-                int flag = htmlStr.indexOf(">", imgIndex);
-                if(htmlStr.charAt(flag - 1) != '/'){
-                    htmlStr = htmlStr.substring(0,flag) + "/" + htmlStr.substring(flag);
-                }
-                imgIndex = indexOfRegex(htmlStr,"<((meta)|(META)) ",flag);
-            }
-        }
-
-        {//补全img标签
-            int imgIndex = indexOfRegex(htmlStr,"<((img)|(IMG)) ");
-            while(imgIndex > 0){
-                int flag = htmlStr.indexOf(">", imgIndex);
-                if(htmlStr.charAt(flag - 1) != '/'){
-                    htmlStr = htmlStr.substring(0,flag) + "/" + htmlStr.substring(flag);
-                }
-                imgIndex = indexOfRegex(htmlStr,"<((img)|(IMG)) ",flag);
-            }
-        }
-        return new String(htmlStr.getBytes("UTF-8"));
-    }
-
-    /**
-     * 从指定的位置开始查找第一个匹配正则表达式的字符串的位置
-     * @param str
-     * @param regex 正则表达式
-     * @param fromIndex 指定的起始位置
-     * @return
-     */
-    public static int indexOfRegex(String str,String regex,int fromIndex){
-        int index = indexOfRegex(str.substring(fromIndex),regex);
-        if(index < 0){
-            return -1;
-        }
-        return fromIndex + index;
-    }
-
-    /**
-     * 查找第一个匹配正则表达式的字符串的位置
-     * @param str
-     * @param regex 正则表达式
-     * @return
-     */
-    public static int indexOfRegex(String str,String regex){
-        Pattern p = Pattern.compile(regex);
-        Matcher m = p.matcher(str);
-        if(m.find()){
-            return m.start();
-        }else{
-            return -1;
-        }
-    }
-
     public static byte[] getWordBytesByQuestion(WordprocessingMLPackage wordMLPackage, List<String> wordXmls) throws Exception {
         RelationshipsPart mainRelationshipsPart = wordMLPackage.getMainDocumentPart().getRelationshipsPart();
         // 获取总的资源文件存储

+ 9 - 11
cqb-question-resource/src/main/java/com/qmth/cqb/question/service/impl/QuesServiceImpl.java

@@ -8,6 +8,7 @@ import com.qmth.cqb.question.dao.QuesPkgPathRepo;
 import com.qmth.cqb.question.model.*;
 import org.apache.commons.lang3.StringEscapeUtils;
 import org.apache.commons.lang3.StringUtils;
+import org.docx4j.Docx4J;
 import org.docx4j.openpackaging.exceptions.InvalidFormatException;
 import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
 import org.springframework.beans.factory.annotation.Autowired;
@@ -289,7 +290,6 @@ public class QuesServiceImpl implements QuesService{
             question.setQuesBodyWord(null);
             question.setQuesAnswerWord(null);
             question.setQuesAnswerAnalysisWord(null);
-//            question.setQuesPkg(new byte[0]);
             String newQuesBody = question.getQuesBody().replaceAll("<span>", "").replaceAll("</span>", "")
                     .replaceAll("###", "______");
             question.setQuesBody(newQuesBody);
@@ -308,9 +308,12 @@ public class QuesServiceImpl implements QuesService{
      */
     public void updateQuesWord(Question question) {
         try {
-        	if(wordMLPackage==null){
+        	if(wordMLPackage == null){
         		wordMLPackage = WordprocessingMLPackage.createPackage();
-        	}
+        	}else{
+                DocxProcessUtil.initTmpPackage(wordMLPackage);
+        	    wordMLPackage.getRelationshipsPart().remove();
+            }
             updateQuesWordUnit(wordMLPackage, question);
             List<Question> subQuesList = question.getSubQuestions();
             if (subQuesList != null && subQuesList.size() > 0) {
@@ -319,10 +322,10 @@ public class QuesServiceImpl implements QuesService{
                 }
             }
             byte [] pkgByte = DocxProcessUtil.getPkgByte(wordMLPackage);
-            QuestionPkgPath quesPkgPath = quesPkgPathRepo.save(new QuestionPkgPath(pkgByte));
-            question.setQuesPkgPathId(quesPkgPath.getId());
+            QuestionPkgPath quesPkgPath = quesPkgPathRepo.findFirstById(question.getQuesPkgPathId());
+            quesPkgPath.setQuesPkg(pkgByte);
+            quesPkgPathRepo.save(quesPkgPath);
             pkgByte = null;
-            quesPkgPath.setQuesPkg(null);
         } catch (Exception e) {
             e.printStackTrace();
         }
@@ -351,11 +354,6 @@ public class QuesServiceImpl implements QuesService{
                         CommonUtils.formatHtml(quesOption.getOptionBody())));
             }
         }
-        byte [] pkgByte = DocxProcessUtil.getPkgByte(wordMLPackage);
-        QuestionPkgPath quesPkgPath = quesPkgPathRepo.save(new QuestionPkgPath(pkgByte));
-        question.setQuesPkgPathId(quesPkgPath.getId());
-        pkgByte = null;
-        quesPkgPath.setQuesPkg(null);
     }
     
     private String makeQuesAnswerWord(String quesAnswer){