|
@@ -496,12 +496,18 @@ public final class DocxProcessUtil {
|
|
|
// }
|
|
|
|
|
|
public static String getTextInHtml(String htmlStr) {
|
|
|
+ if(htmlStr==null) {
|
|
|
+ return htmlStr;
|
|
|
+ }
|
|
|
htmlStr = htmlStr.replaceAll("\\&[a-zA-Z]{1,10};", "").trim();
|
|
|
|
|
|
try {
|
|
|
org.jsoup.nodes.Document doc = Jsoup.parse(htmlStr);
|
|
|
StringBuilder textStr = new StringBuilder();
|
|
|
Elements links = doc.select("body");
|
|
|
+ if(links==null||links.size()==0) {
|
|
|
+ return htmlStr;
|
|
|
+ }
|
|
|
for (Node node : links.get(0).childNodes()) {
|
|
|
getTextByNode(textStr, node);
|
|
|
}
|