examcloud-DevOps
/
python-tools


			
							123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
							"""
   功能：将docx文件中的公式全部转为图片
   author:pengche
   time:2020/12/22
"""
from docx.parts import document
from docx.package import Package
from docx.shared import Inches
from docx.text.paragraph import Paragraph
import re
import os
import logger
import config
import sys
processedList=[]
unprocessedList=[]
from win32com import client as wc


#生成资源文件目录访问路径

def saveasdocx(docpath):
    word = wc.Dispatch("Word.Application")
    doc = word.Documents.Open(docpath)  # 打开word文件
    doc.SaveAs("{}x".format(docpath), 12)  # 另存为后缀为".docx"的文件，其中参数12指docx文件
    doc.Close()  # 关闭原来word文件
    word.Quit()
    return "{}x".format(docpath)

def resource_path(relative_path):
    if getattr(sys, 'frozen', False): #是否Bundle Resource
        base_path = sys._MEIPASS
    else:
        base_path = os.path.abspath(".")
    return os.path.join(base_path, relative_path)

def iter_block_items(parent):
    """
      遍历节点
    """
    #document为主节点
    if type(parent)==document.DocumentPart:
        parent_elm = parent.element.body
    else:
        parent_elm=parent

    for child in parent_elm.iterchildren():
        #print(type(child))
        yield child
        for child2 in iter_block_items(child):
            yield child2

def runCmd(cmd):
    p=os.popen(cmd).readlines()
    logger.info(p)

def traversDoc(document_part):
    for para in document_part.paragraphs:
        #print(para)
        for run in para.runs:
            height = None
            width = None
            for children in iter_block_items(run.element):
                print(children)
                if children.tag == "{urn:schemas-microsoft-com:vml}shape":
                    """'height:13.95pt;width:42.95pt;'"""
                    shape = children.attrib["style"]
                    #print("shape: "+shape)
                    heightpattern = "height:([0-9]+(\.?[0-9]+)?)pt"
                    widthpattern = "width:([0-9]+(\.?[0-9]+)?)pt"
                    if re.search(heightpattern, shape):
                        height = re.search(heightpattern, shape).group(1)
                    if re.search(widthpattern, shape):
                        width = re.search(widthpattern, shape).group(1)
                elif children.tag == "{urn:schemas-microsoft-com:vml}imagedata":
                    if "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id" in children.attrib:
                        rid = children.attrib["{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id"]
                        processedList.append(rid)
                        imagedata = document_part.part.rels[rid].target_part
                        if imagedata.content_type !="image/x-wmf":
                            continue
                        with open("temp.wmf", "wb") as file:
                            file.write(imagedata.blob)
                        srcpath = os.getcwd() + os.sep + "temp.wmf"
                        dstpath = os.getcwd() + os.sep + "temp.png"
                        cmd = resource_path(os.path.join("tools","Project1.exe"))+" " + "\"" + srcpath + "\"" + " " + "\"" + dstpath + "\""
                        logger.info(cmd)
                        runCmd(cmd)
                        run.clear()
                        # 获取图片大小
                        from PIL import Image
                        img = Image.open("temp.png")
                        if not height:
                            height = img.size[1] /37/72
                        else:
                            height = float(height) / 72
                        if not width:
                            width = img.size[0] /37/72
                        else:
                            width = float(width) / 72
                        run.add_picture("temp.png", width=Inches(width), height=Inches(height))

def processfile(filepath):
    global processedList
    global unprocessedList
    processedList=[]
    unprocessedList=[]
    if filepath.endswith(".docx"):
        document = Package.open(filepath).main_document_part.document
        #写入unprocessList
        relslist=document.part.rels
        for rel in relslist:
            if relslist[rel].target_part.content_type=="'image/x-wmf'":
                unprocessedList.append(rel)
        traversDoc(document)
        #比较是否一致
        if unprocessedList.sort()==processedList.sort():
            logger.info(filepath+"处理完成没有遗漏")
        else:
            logger.info(filepath+"处理有遗漏")
            Lst=[]
            for m in unprocessedList:
                if m not in processedList:
                    Lst.append(m)
            logger.info("遗漏为",Lst)
        if not os.path.exists("result"):
            os.makedirs("result")
        document.save("result/"+os.path.basename(filepath))
    elif filepath.endswith(".doc"):
        dstpath=saveasdocx(filepath)
        processfile(dstpath)
    else:
        logger.info(filepath + "是不支持的格式")
def main():
    if os.path.isdir(config.filepath):
        for root, dirs, files in os.walk(config.filepath):
            for f in files:
                path=os.path.join(root, f)
                processfile(path)

    else:
        processfile(config.filepath)


if __name__=="__main__":
    main()