123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148 |
- """
- 功能:将docx文件中的公式全部转为图片
- author:pengche
- time:2020/12/22
- """
- from docx.parts import document
- from docx.package import Package
- from docx.shared import Inches
- from docx.text.paragraph import Paragraph
- import re
- import os
- import logger
- import config
- import sys
- processedList=[]
- unprocessedList=[]
- from win32com import client as wc
- #生成资源文件目录访问路径
- def saveasdocx(docpath):
- word = wc.Dispatch("Word.Application")
- doc = word.Documents.Open(docpath) # 打开word文件
- doc.SaveAs("{}x".format(docpath), 12) # 另存为后缀为".docx"的文件,其中参数12指docx文件
- doc.Close() # 关闭原来word文件
- word.Quit()
- return "{}x".format(docpath)
- def resource_path(relative_path):
- if getattr(sys, 'frozen', False): #是否Bundle Resource
- base_path = sys._MEIPASS
- else:
- base_path = os.path.abspath(".")
- return os.path.join(base_path, relative_path)
- def iter_block_items(parent):
- """
- 遍历节点
- """
- #document为主节点
- if type(parent)==document.DocumentPart:
- parent_elm = parent.element.body
- else:
- parent_elm=parent
- for child in parent_elm.iterchildren():
- #print(type(child))
- yield child
- for child2 in iter_block_items(child):
- yield child2
- def runCmd(cmd):
- p=os.popen(cmd).readlines()
- logger.info(p)
- def traversDoc(document_part):
- for para in document_part.paragraphs:
- #print(para)
- for run in para.runs:
- height = None
- width = None
- for children in iter_block_items(run.element):
- print(children)
- if children.tag == "{urn:schemas-microsoft-com:vml}shape":
- """'height:13.95pt;width:42.95pt;'"""
- shape = children.attrib["style"]
- #print("shape: "+shape)
- heightpattern = "height:([0-9]+(\.?[0-9]+)?)pt"
- widthpattern = "width:([0-9]+(\.?[0-9]+)?)pt"
- if re.search(heightpattern, shape):
- height = re.search(heightpattern, shape).group(1)
- if re.search(widthpattern, shape):
- width = re.search(widthpattern, shape).group(1)
- elif children.tag == "{urn:schemas-microsoft-com:vml}imagedata":
- if "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id" in children.attrib:
- rid = children.attrib["{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id"]
- processedList.append(rid)
- imagedata = document_part.part.rels[rid].target_part
- if imagedata.content_type !="image/x-wmf":
- continue
- with open("temp.wmf", "wb") as file:
- file.write(imagedata.blob)
- srcpath = os.getcwd() + os.sep + "temp.wmf"
- dstpath = os.getcwd() + os.sep + "temp.png"
- cmd = resource_path(os.path.join("tools","Project1.exe"))+" " + "\"" + srcpath + "\"" + " " + "\"" + dstpath + "\""
- logger.info(cmd)
- runCmd(cmd)
- run.clear()
- # 获取图片大小
- from PIL import Image
- img = Image.open("temp.png")
- if not height:
- height = img.size[1] /37/72
- else:
- height = float(height) / 72
- if not width:
- width = img.size[0] /37/72
- else:
- width = float(width) / 72
- run.add_picture("temp.png", width=Inches(width), height=Inches(height))
- def processfile(filepath):
- global processedList
- global unprocessedList
- processedList=[]
- unprocessedList=[]
- if filepath.endswith(".docx"):
- document = Package.open(filepath).main_document_part.document
- #写入unprocessList
- relslist=document.part.rels
- for rel in relslist:
- if relslist[rel].target_part.content_type=="'image/x-wmf'":
- unprocessedList.append(rel)
- traversDoc(document)
- #比较是否一致
- if unprocessedList.sort()==processedList.sort():
- logger.info(filepath+"处理完成没有遗漏")
- else:
- logger.info(filepath+"处理有遗漏")
- Lst=[]
- for m in unprocessedList:
- if m not in processedList:
- Lst.append(m)
- logger.info("遗漏为",Lst)
- if not os.path.exists("result"):
- os.makedirs("result")
- document.save("result/"+os.path.basename(filepath))
- elif filepath.endswith(".doc"):
- dstpath=saveasdocx(filepath)
- processfile(dstpath)
- else:
- logger.info(filepath + "是不支持的格式")
- def main():
- if os.path.isdir(config.filepath):
- for root, dirs, files in os.walk(config.filepath):
- for f in files:
- path=os.path.join(root, f)
- processfile(path)
- else:
- processfile(config.filepath)
- if __name__=="__main__":
- main()
|