|
@@ -581,14 +581,8 @@ public final class DocxProcessUtil {
|
|
|
namespaces.stream().forEach(namespace -> {
|
|
|
root.remove(namespace);
|
|
|
});
|
|
|
- List<org.dom4j.Element> elements = root.elements();
|
|
|
- elements.stream().forEach(element ->{
|
|
|
- if("w:bookmarkStart".equals(element.getQualifiedName())||
|
|
|
- "w:bookmarkEnd".equals(element.getQualifiedName())){
|
|
|
- root.remove(element);
|
|
|
- }
|
|
|
- });
|
|
|
- return root.asXML();
|
|
|
+ String returnXml = CommonUtils.trimNoBlankSpace(root.asXML());
|
|
|
+ return returnXml;
|
|
|
}
|
|
|
|
|
|
/**
|
|
@@ -900,93 +894,17 @@ public final class DocxProcessUtil {
|
|
|
XHTMLImporter.setHyperlinkStyle("Hyperlink");
|
|
|
String wordMl = "";
|
|
|
wordMLPackage.getMainDocumentPart().getContent().addAll(
|
|
|
- XHTMLImporter.convert( repairHtmlStr(html), TEMP_FILE_IMP) );
|
|
|
+ XHTMLImporter.convert(html, TEMP_FILE_IMP) );
|
|
|
+ //转换完后就初始化image
|
|
|
+ initPkgImage(wordMLPackage);
|
|
|
// 获取word文档中所有段落
|
|
|
List<Object> pList = getAllElementFromObject(wordMLPackage.getMainDocumentPart(), P.class);
|
|
|
for(Object p:pList){
|
|
|
- wordMl += XmlUtils.marshaltoString(p);
|
|
|
+ wordMl += formatPWordMl(XmlUtils.marshaltoString(p));
|
|
|
}
|
|
|
-// wordMl = formatPWordMl(wordMl);
|
|
|
- initPkgImage(wordMLPackage);
|
|
|
return wordMl;
|
|
|
}
|
|
|
|
|
|
- public static String repairHtmlStr(String htmlStr)throws Exception{
|
|
|
- htmlStr = htmlStr.trim();
|
|
|
- if(htmlStr.toLowerCase().contains("<!doctype html ")){
|
|
|
- int index1 = htmlStr.toLowerCase().indexOf("<!doctype html ");
|
|
|
- int index2 = htmlStr.indexOf('>',index1 + 1);
|
|
|
- htmlStr = htmlStr.substring(0, index1) + htmlStr.substring(index2 + 1);
|
|
|
- }
|
|
|
- while(htmlStr.toLowerCase().contains("<br ")){
|
|
|
- int index1 = htmlStr.toLowerCase().indexOf("<br ");
|
|
|
- int index2 = htmlStr.toLowerCase().indexOf(">",index1 + 1);
|
|
|
- htmlStr = htmlStr.substring(0, index1) + "<br/>" + htmlStr.substring(index2 + 1);
|
|
|
- }
|
|
|
- while(htmlStr.toLowerCase().endsWith("<br>") || htmlStr.toLowerCase().endsWith("<br/>")){
|
|
|
- if(htmlStr.toLowerCase().endsWith("<br>")){
|
|
|
- htmlStr = htmlStr.substring(0, htmlStr.length()-"<br>".length());
|
|
|
- }else if(htmlStr.toLowerCase().endsWith("<br/>")){
|
|
|
- htmlStr = htmlStr.substring(0, htmlStr.length()-"<br/>".length());
|
|
|
- }
|
|
|
- }
|
|
|
- htmlStr = htmlStr.replace("<br>", "<br/>").replace("<BR>", "<br/>");
|
|
|
-
|
|
|
- {//补全META标签
|
|
|
- int imgIndex = indexOfRegex(htmlStr,"<((meta)|(META)) ");
|
|
|
- while(imgIndex > 0){
|
|
|
- int flag = htmlStr.indexOf(">", imgIndex);
|
|
|
- if(htmlStr.charAt(flag - 1) != '/'){
|
|
|
- htmlStr = htmlStr.substring(0,flag) + "/" + htmlStr.substring(flag);
|
|
|
- }
|
|
|
- imgIndex = indexOfRegex(htmlStr,"<((meta)|(META)) ",flag);
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
- {//补全img标签
|
|
|
- int imgIndex = indexOfRegex(htmlStr,"<((img)|(IMG)) ");
|
|
|
- while(imgIndex > 0){
|
|
|
- int flag = htmlStr.indexOf(">", imgIndex);
|
|
|
- if(htmlStr.charAt(flag - 1) != '/'){
|
|
|
- htmlStr = htmlStr.substring(0,flag) + "/" + htmlStr.substring(flag);
|
|
|
- }
|
|
|
- imgIndex = indexOfRegex(htmlStr,"<((img)|(IMG)) ",flag);
|
|
|
- }
|
|
|
- }
|
|
|
- return new String(htmlStr.getBytes("UTF-8"));
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 从指定的位置开始查找第一个匹配正则表达式的字符串的位置
|
|
|
- * @param str
|
|
|
- * @param regex 正则表达式
|
|
|
- * @param fromIndex 指定的起始位置
|
|
|
- * @return
|
|
|
- */
|
|
|
- public static int indexOfRegex(String str,String regex,int fromIndex){
|
|
|
- int index = indexOfRegex(str.substring(fromIndex),regex);
|
|
|
- if(index < 0){
|
|
|
- return -1;
|
|
|
- }
|
|
|
- return fromIndex + index;
|
|
|
- }
|
|
|
-
|
|
|
- /**
|
|
|
- * 查找第一个匹配正则表达式的字符串的位置
|
|
|
- * @param str
|
|
|
- * @param regex 正则表达式
|
|
|
- * @return
|
|
|
- */
|
|
|
- public static int indexOfRegex(String str,String regex){
|
|
|
- Pattern p = Pattern.compile(regex);
|
|
|
- Matcher m = p.matcher(str);
|
|
|
- if(m.find()){
|
|
|
- return m.start();
|
|
|
- }else{
|
|
|
- return -1;
|
|
|
- }
|
|
|
- }
|
|
|
-
|
|
|
public static byte[] getWordBytesByQuestion(WordprocessingMLPackage wordMLPackage, List<String> wordXmls) throws Exception {
|
|
|
RelationshipsPart mainRelationshipsPart = wordMLPackage.getMainDocumentPart().getRelationshipsPart();
|
|
|
// 获取总的资源文件存储
|