xiatian 1 år sedan
förälder
incheckning
88ea59b457

+ 2 - 34
examcloud-core-questions-base/src/main/java/cn/com/qmth/examcloud/core/questions/base/word/DocxProcessUtil.java

@@ -12,7 +12,6 @@ import net.sourceforge.jeuclid.context.LayoutContextImpl;
 import net.sourceforge.jeuclid.context.StyleAttributeLayoutContext;
 import net.sourceforge.jeuclid.converter.Converter;
 import org.apache.commons.codec.binary.Base64;
-import org.apache.commons.collections4.CollectionUtils;
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
 import org.apache.commons.lang3.StringUtils;
@@ -45,9 +44,6 @@ import org.dom4j.DocumentHelper;
 import org.dom4j.Namespace;
 import org.dom4j.io.SAXReader;
 import org.jsoup.Jsoup;
-import org.jsoup.nodes.Element;
-import org.jsoup.nodes.Node;
-import org.jsoup.nodes.TextNode;
 import org.jsoup.select.Elements;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
@@ -501,38 +497,10 @@ public final class DocxProcessUtil {
     	}
 		htmlStr = htmlStr.replaceAll("\\&[a-zA-Z]{1,10};", "").trim();
 
-		try {
-			org.jsoup.nodes.Document doc = Jsoup.parse(htmlStr);
-			StringBuilder textStr = new StringBuilder();
-			Elements links = doc.select("body");
-			if(links==null||links.size()==0) {
-				return htmlStr;
-			}
-			for (Node node : links.get(0).childNodes()) {
-				getTextByNode(textStr, node);
-			}
-			return textStr.toString();
-		} catch (Exception e) {
-			throw new RuntimeException(e);
-		}
+		org.jsoup.nodes.Document doc = Jsoup.parse(htmlStr);
+		return doc.text();
 	}
 
-	private static void getTextByNode(StringBuilder textStr, Node node) {
-
-		if (node instanceof TextNode) {
-			TextNode tn = (TextNode) node;
-			textStr.append(tn.text());
-		} else if (node instanceof Element) {
-			Element e = (Element) node;
-			if (CollectionUtils.isNotEmpty(e.childNodes())) {
-				for (Node snode : e.childNodes()) {
-					getTextByNode(textStr, snode);
-				}
-			}
-		}else {
-//			throw new StatusException("解析出错:"+node);
-		}
-	}
 
     /**
      * 格式化转换后的html(html临时文件)