|
@@ -27,10 +27,12 @@ import org.docx4j.jaxb.Context;
|
|
|
import org.docx4j.math.CTOMath;
|
|
|
import org.docx4j.openpackaging.exceptions.Docx4JException;
|
|
|
import org.docx4j.openpackaging.io3.stores.PartStore;
|
|
|
+import org.docx4j.openpackaging.io3.stores.ZipPartStore;
|
|
|
import org.docx4j.openpackaging.packages.WordprocessingMLPackage;
|
|
|
import org.docx4j.openpackaging.parts.Part;
|
|
|
import org.docx4j.openpackaging.parts.WordprocessingML.BinaryPartAbstractImage;
|
|
|
import org.docx4j.openpackaging.parts.WordprocessingML.MainDocumentPart;
|
|
|
+import org.docx4j.openpackaging.parts.WordprocessingML.NumberingDefinitionsPart;
|
|
|
import org.docx4j.openpackaging.parts.relationships.Namespaces;
|
|
|
import org.docx4j.openpackaging.parts.relationships.RelationshipsPart;
|
|
|
import org.docx4j.relationships.Relationship;
|
|
@@ -53,6 +55,8 @@ import javax.xml.transform.stream.StreamResult;
|
|
|
import javax.xml.transform.stream.StreamSource;
|
|
|
import java.io.*;
|
|
|
import java.util.*;
|
|
|
+import java.util.regex.Matcher;
|
|
|
+import java.util.regex.Pattern;
|
|
|
|
|
|
/**
|
|
|
* docx处理工具类 Created by songyue on 17/3/10.
|
|
@@ -667,11 +671,12 @@ public final class DocxProcessUtil {
|
|
|
// 以单个wordXml方式解析freemarker导出的文件
|
|
|
FlatOpcXmlImporter flatOpcXmlImporter = new FlatOpcXmlImporter(mainFile);
|
|
|
WordprocessingMLPackage wordMLPackage = (WordprocessingMLPackage) flatOpcXmlImporter.get();
|
|
|
+ RelationshipsPart relationshipsPart = wordMLPackage.getMainDocumentPart().getRelationshipsPart();
|
|
|
|
|
|
for (WordprocessingMLPackage wp : wordMLPackages) {
|
|
|
|
|
|
// 获取资源文件存储
|
|
|
- PartStore partStore = wp.getSourcePartStore();
|
|
|
+ ZipPartStore partStore = (ZipPartStore) wp.getSourcePartStore();
|
|
|
// 获取图片资源定义
|
|
|
RelationshipsPart rp = wp.getMainDocumentPart().getRelationshipsPart();
|
|
|
List<Relationship> rels = rp.getRelationshipsByType(Namespaces.IMAGE);
|
|
@@ -680,13 +685,15 @@ public final class DocxProcessUtil {
|
|
|
for (Relationship relationship : rels) {
|
|
|
parts.add(rp.getPart(relationship));
|
|
|
}
|
|
|
- // 添加资源文件存储
|
|
|
- wordMLPackage.setSourcePartStore(partStore);
|
|
|
// 添加资源文件定义
|
|
|
for (Part p : parts) {
|
|
|
- wordMLPackage.getMainDocumentPart().addTargetPart(p, RelationshipsPart.AddPartBehaviour.REUSE_EXISTING,
|
|
|
- p.getSourceRelationship().getId());
|
|
|
+ String relId = p.getSourceRelationships().get(0).getId();
|
|
|
+ if(!relationshipsPart.isRelIdOccupied(relId)){
|
|
|
+ copyImage(wordMLPackage,partStore,p);
|
|
|
+ }
|
|
|
}
|
|
|
+ // 添加资源文件存储
|
|
|
+// wordMLPackage.setSourcePartStore(partStore);
|
|
|
}
|
|
|
// 以word2007标准模式重新保存(zip包)
|
|
|
OutputStream os = new java.io.FileOutputStream(filePath);
|
|
@@ -694,6 +701,19 @@ public final class DocxProcessUtil {
|
|
|
IOUtils.closeQuietly(os);
|
|
|
}
|
|
|
|
|
|
+ /**
|
|
|
+ * word合并时复制图片
|
|
|
+ * @param wordMLPackage
|
|
|
+ * @param partStore
|
|
|
+ * @param p
|
|
|
+ * @throws Exception
|
|
|
+ */
|
|
|
+ public static void copyImage(WordprocessingMLPackage wordMLPackage, ZipPartStore partStore, Part p)throws Exception{
|
|
|
+ byte [] bytes = partStore.getByteArray(p.getPartName().getName().substring(1)).getBytes();
|
|
|
+ BinaryPartAbstractImage imagePart = BinaryPartAbstractImage.createImagePart(wordMLPackage, bytes);
|
|
|
+ imagePart.getRelLast().setId(p.getSourceRelationships().get(0).getId());
|
|
|
+ }
|
|
|
+
|
|
|
/**
|
|
|
* 获取word二进制数据(空文档,只有样式和资源)
|
|
|
*
|
|
@@ -748,7 +768,6 @@ public final class DocxProcessUtil {
|
|
|
.getRelationshipsByType(Namespaces.IMAGE);
|
|
|
// 同步替换资源ID
|
|
|
int index = 0;
|
|
|
- String rldHeader = getRldNum();
|
|
|
for (Relationship relationship : relationships) {
|
|
|
String tmpId = relationship.getId();
|
|
|
for (Object obj : blips) {
|
|
@@ -756,7 +775,7 @@ public final class DocxProcessUtil {
|
|
|
CTBlip ctBlip = (CTBlip) obj;
|
|
|
String tmpEmbed = ctBlip.getEmbed();
|
|
|
if (tmpId.equals(tmpEmbed)) {
|
|
|
- String tmp = rldHeader + (++index);
|
|
|
+ String tmp = getRldNum() + (++index);
|
|
|
relationship.setId(tmp);
|
|
|
ctBlip.setEmbed(tmp);
|
|
|
}
|
|
@@ -803,7 +822,7 @@ public final class DocxProcessUtil {
|
|
|
XHTMLImporterImpl XHTMLImporter = new XHTMLImporterImpl(wordMLPackage);
|
|
|
String wordMl = "";
|
|
|
wordMLPackage.getMainDocumentPart().getContent().addAll(
|
|
|
- XHTMLImporter.convert( html, null) );
|
|
|
+ XHTMLImporter.convert( repairHtmlStr(html), null) );
|
|
|
// 获取word文档中所有段落
|
|
|
List<Object> pList = getAllElementFromObject(wordMLPackage.getMainDocumentPart(), P.class);
|
|
|
for(Object p:pList){
|
|
@@ -814,6 +833,82 @@ public final class DocxProcessUtil {
|
|
|
return wordMl;
|
|
|
}
|
|
|
|
|
|
+ public static String repairHtmlStr(String htmlStr){
|
|
|
+ htmlStr = htmlStr.trim();
|
|
|
+ if(htmlStr.toLowerCase().contains("<!doctype html ")){
|
|
|
+ int index1 = htmlStr.toLowerCase().indexOf("<!doctype html ");
|
|
|
+ int index2 = htmlStr.indexOf('>',index1 + 1);
|
|
|
+ htmlStr = htmlStr.substring(0, index1) + htmlStr.substring(index2 + 1);
|
|
|
+ }
|
|
|
+ while(htmlStr.toLowerCase().contains("<br ")){
|
|
|
+ int index1 = htmlStr.toLowerCase().indexOf("<br ");
|
|
|
+ int index2 = htmlStr.toLowerCase().indexOf(">",index1 + 1);
|
|
|
+ htmlStr = htmlStr.substring(0, index1) + "<br/>" + htmlStr.substring(index2 + 1);
|
|
|
+ }
|
|
|
+ while(htmlStr.toLowerCase().endsWith("<br>") || htmlStr.toLowerCase().endsWith("<br/>")){
|
|
|
+ if(htmlStr.toLowerCase().endsWith("<br>")){
|
|
|
+ htmlStr = htmlStr.substring(0, htmlStr.length()-"<br>".length());
|
|
|
+ }else if(htmlStr.toLowerCase().endsWith("<br/>")){
|
|
|
+ htmlStr = htmlStr.substring(0, htmlStr.length()-"<br/>".length());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ htmlStr = htmlStr.replace("<br>", "<br/>").replace("<BR>", "<br/>");
|
|
|
+
|
|
|
+ {//补全META标签
|
|
|
+ int imgIndex = indexOfRegex(htmlStr,"<((meta)|(META)) ");
|
|
|
+ while(imgIndex > 0){
|
|
|
+ int flag = htmlStr.indexOf(">", imgIndex);
|
|
|
+ if(htmlStr.charAt(flag - 1) != '/'){
|
|
|
+ htmlStr = htmlStr.substring(0,flag) + "/" + htmlStr.substring(flag);
|
|
|
+ }
|
|
|
+ imgIndex = indexOfRegex(htmlStr,"<((meta)|(META)) ",flag);
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ {//补全img标签
|
|
|
+ int imgIndex = indexOfRegex(htmlStr,"<((img)|(IMG)) ");
|
|
|
+ while(imgIndex > 0){
|
|
|
+ int flag = htmlStr.indexOf(">", imgIndex);
|
|
|
+ if(htmlStr.charAt(flag - 1) != '/'){
|
|
|
+ htmlStr = htmlStr.substring(0,flag) + "/" + htmlStr.substring(flag);
|
|
|
+ }
|
|
|
+ imgIndex = indexOfRegex(htmlStr,"<((img)|(IMG)) ",flag);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ return htmlStr;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 从指定的位置开始查找第一个匹配正则表达式的字符串的位置
|
|
|
+ * @param str
|
|
|
+ * @param regex 正则表达式
|
|
|
+ * @param fromIndex 指定的起始位置
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static int indexOfRegex(String str,String regex,int fromIndex){
|
|
|
+ int index = indexOfRegex(str.substring(fromIndex),regex);
|
|
|
+ if(index < 0){
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+ return fromIndex + index;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 查找第一个匹配正则表达式的字符串的位置
|
|
|
+ * @param str
|
|
|
+ * @param regex 正则表达式
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static int indexOfRegex(String str,String regex){
|
|
|
+ Pattern p = Pattern.compile(regex);
|
|
|
+ Matcher m = p.matcher(str);
|
|
|
+ if(m.find()){
|
|
|
+ return m.start();
|
|
|
+ }else{
|
|
|
+ return -1;
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
public static void main(String[] args) {
|
|
|
System.out.println("123".matches("^\\d{1,}$"));
|
|
|
}
|