""" 功能:将docx文件中的公式全部转为图片 author:pengche time:2020/12/22 """ from docx.parts import document from docx.package import Package from docx.shared import Inches from docx.text.paragraph import Paragraph import re import os import logger import config import sys processedList=[] unprocessedList=[] from win32com import client as wc #生成资源文件目录访问路径 def saveasdocx(docpath): word = wc.Dispatch("Word.Application") doc = word.Documents.Open(docpath) # 打开word文件 doc.SaveAs("{}x".format(docpath), 12) # 另存为后缀为".docx"的文件,其中参数12指docx文件 doc.Close() # 关闭原来word文件 word.Quit() return "{}x".format(docpath) def resource_path(relative_path): if getattr(sys, 'frozen', False): #是否Bundle Resource base_path = sys._MEIPASS else: base_path = os.path.abspath(".") return os.path.join(base_path, relative_path) def iter_block_items(parent): """ 遍历节点 """ #document为主节点 if type(parent)==document.DocumentPart: parent_elm = parent.element.body else: parent_elm=parent for child in parent_elm.iterchildren(): #print(type(child)) yield child for child2 in iter_block_items(child): yield child2 def runCmd(cmd): p=os.popen(cmd).readlines() logger.info(p) def traversDoc(document_part): for para in document_part.paragraphs: #print(para) for run in para.runs: height = None width = None for children in iter_block_items(run.element): print(children) if children.tag == "{urn:schemas-microsoft-com:vml}shape": """'height:13.95pt;width:42.95pt;'""" shape = children.attrib["style"] #print("shape: "+shape) heightpattern = "height:([0-9]+(\.?[0-9]+)?)pt" widthpattern = "width:([0-9]+(\.?[0-9]+)?)pt" if re.search(heightpattern, shape): height = re.search(heightpattern, shape).group(1) if re.search(widthpattern, shape): width = re.search(widthpattern, shape).group(1) elif children.tag == "{urn:schemas-microsoft-com:vml}imagedata": if "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id" in children.attrib: rid = children.attrib["{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id"] processedList.append(rid) imagedata = document_part.part.rels[rid].target_part if imagedata.content_type !="image/x-wmf": continue with open("temp.wmf", "wb") as file: file.write(imagedata.blob) srcpath = os.getcwd() + os.sep + "temp.wmf" dstpath = os.getcwd() + os.sep + "temp.png" cmd = resource_path(os.path.join("tools","Project1.exe"))+" " + "\"" + srcpath + "\"" + " " + "\"" + dstpath + "\"" logger.info(cmd) runCmd(cmd) run.clear() # 获取图片大小 from PIL import Image img = Image.open("temp.png") if not height: height = img.size[1] /37/72 else: height = float(height) / 72 if not width: width = img.size[0] /37/72 else: width = float(width) / 72 run.add_picture("temp.png", width=Inches(width), height=Inches(height)) def processfile(filepath): global processedList global unprocessedList processedList=[] unprocessedList=[] if filepath.endswith(".docx"): document = Package.open(filepath).main_document_part.document #写入unprocessList relslist=document.part.rels for rel in relslist: if relslist[rel].target_part.content_type=="'image/x-wmf'": unprocessedList.append(rel) traversDoc(document) #比较是否一致 if unprocessedList.sort()==processedList.sort(): logger.info(filepath+"处理完成没有遗漏") else: logger.info(filepath+"处理有遗漏") Lst=[] for m in unprocessedList: if m not in processedList: Lst.append(m) logger.info("遗漏为",Lst) if not os.path.exists("result"): os.makedirs("result") document.save("result/"+os.path.basename(filepath)) elif filepath.endswith(".doc"): dstpath=saveasdocx(filepath) processfile(dstpath) else: logger.info(filepath + "是不支持的格式") def main(): if os.path.isdir(config.filepath): for root, dirs, files in os.walk(config.filepath): for f in files: path=os.path.join(root, f) processfile(path) else: processfile(config.filepath) if __name__=="__main__": main()