|
@@ -450,6 +450,7 @@ public final class DocxProcessUtil {
|
|
|
* @return
|
|
|
*/
|
|
|
public static String getTextInHtml(String htmlStr){
|
|
|
+ htmlStr = htmlStr.replaceAll("\\&[a-zA-Z]{1,10};", "").trim();
|
|
|
if(!htmlStr.startsWith("<p>")){
|
|
|
return htmlStr;
|
|
|
}
|
|
@@ -457,8 +458,9 @@ public final class DocxProcessUtil {
|
|
|
org.jsoup.nodes.Document doc = Jsoup.parse(htmlStr);
|
|
|
String textStr = "";
|
|
|
Elements links = doc.select("p").removeAttr("img");
|
|
|
+
|
|
|
for (Element link : links) {
|
|
|
- textStr += link.text();
|
|
|
+ textStr += link.text().trim();
|
|
|
}
|
|
|
return textStr;
|
|
|
} catch (Exception e) {
|