1 jaar geleden · 8aeca3e30f
--- a/examcloud-core-questions-base/src/main/java/cn/com/qmth/examcloud/core/questions/base/word/DocxProcessUtil.java
+++ b/examcloud-core-questions-base/src/main/java/cn/com/qmth/examcloud/core/questions/base/word/DocxProcessUtil.java
@@ -12,6 +12,7 @@ import net.sourceforge.jeuclid.context.LayoutContextImpl;
 
				 import net.sourceforge.jeuclid.context.StyleAttributeLayoutContext;
			
 
				 import net.sourceforge.jeuclid.converter.Converter;
			
 
				 import org.apache.commons.codec.binary.Base64;
			
 
				+import org.apache.commons.collections4.CollectionUtils;
			
 
				 import org.apache.commons.io.FileUtils;
			
 
				 import org.apache.commons.io.IOUtils;
			
 
				 import org.apache.commons.lang3.StringUtils;
			
@@ -45,6 +46,8 @@ import org.dom4j.Namespace;
 
				 import org.dom4j.io.SAXReader;
			
 
				 import org.jsoup.Jsoup;
			
 
				 import org.jsoup.nodes.Element;
			
 
				+import org.jsoup.nodes.Node;
			
 
				+import org.jsoup.nodes.TextNode;
			
 
				 import org.jsoup.select.Elements;
			
 
				 import org.slf4j.Logger;
			
 
				 import org.slf4j.LoggerFactory;
			
@@ -469,28 +472,61 @@ public final class DocxProcessUtil {
 
				      * @param htmlStr
			
 
				      * @return
			
 
				      */
			
 
				+//    public static String getTextInHtml(String htmlStr) {
			
 
				+//        htmlStr = htmlStr.replaceAll("\\&[a-zA-Z]{1,10};", "").trim();
			
 
				+//        if (!htmlStr.startsWith("<p>")) {
			
 
				+//            return htmlStr;
			
 
				+//        }
			
 
				+//
			
 
				+//        try {
			
 
				+//            org.jsoup.nodes.Document doc = Jsoup.parse(htmlStr);
			
 
				+//            StringBuilder textStr = new StringBuilder();
			
 
				+//            Elements links = doc.select("p").removeAttr("img");
			
 
				+//
			
 
				+//            for (Element link : links) {
			
 
				+//                textStr.append(link.text().trim());
			
 
				+//            }
			
 
				+//
			
 
				+//            return textStr.toString();
			
 
				+//        } catch (Exception e) {
			
 
				+//            LOG.error(e.getMessage(), e);
			
 
				+//        }
			
 
				+//
			
 
				+//        return htmlStr;
			
 
				+//    }
			
 
				+    
			
 
				     public static String getTextInHtml(String htmlStr) {
			
 
				-        htmlStr = htmlStr.replaceAll("\\&[a-zA-Z]{1,10};", "").trim();
			
 
				-        if (!htmlStr.startsWith("<p>")) {
			
 
				-            return htmlStr;
			
 
				-        }
			
 
				-
			
 
				-        try {
			
 
				-            org.jsoup.nodes.Document doc = Jsoup.parse(htmlStr);
			
 
				-            StringBuilder textStr = new StringBuilder();
			
 
				-            Elements links = doc.select("p").removeAttr("img");
			
 
				-
			
 
				-            for (Element link : links) {
			
 
				-                textStr.append(link.text().trim());
			
 
				-            }
			
 
				-
			
 
				-            return textStr.toString();
			
 
				-        } catch (Exception e) {
			
 
				-            LOG.error(e.getMessage(), e);
			
 
				-        }
			
 
				-
			
 
				-        return htmlStr;
			
 
				-    }
			
 
				+		htmlStr = htmlStr.replaceAll("\\&[a-zA-Z]{1,10};", "").trim();
			
 
				+
			
 
				+		try {
			
 
				+			org.jsoup.nodes.Document doc = Jsoup.parse(htmlStr);
			
 
				+			StringBuilder textStr = new StringBuilder();
			
 
				+			Elements links = doc.select("body");
			
 
				+			for (Node node : links.get(0).childNodes()) {
			
 
				+				getTextByNode(textStr, node);
			
 
				+			}
			
 
				+			return textStr.toString();
			
 
				+		} catch (Exception e) {
			
 
				+			throw new RuntimeException(e);
			
 
				+		}
			
 
				+	}
			
 
				+
			
 
				+	private static void getTextByNode(StringBuilder textStr, Node node) {
			
 
				+
			
 
				+		if (node instanceof TextNode) {
			
 
				+			TextNode tn = (TextNode) node;
			
 
				+			textStr.append(tn.text());
			
 
				+		} else if (node instanceof Element) {
			
 
				+			Element e = (Element) node;
			
 
				+			if (CollectionUtils.isNotEmpty(e.childNodes())) {
			
 
				+				for (Node snode : e.childNodes()) {
			
 
				+					getTextByNode(textStr, snode);
			
 
				+				}
			
 
				+			}
			
 
				+		}else {
			
 
				+//			throw new StatusException("解析出错："+node);
			
 
				+		}
			
 
				+	}
			
 
				 
			
 
				     /**
			
 
				      * 格式化转换后的html(html临时文件)