main.py 5.3 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148
  1. """
  2. 功能:将docx文件中的公式全部转为图片
  3. author:pengche
  4. time:2020/12/22
  5. """
  6. from docx.parts import document
  7. from docx.package import Package
  8. from docx.shared import Inches
  9. from docx.text.paragraph import Paragraph
  10. import re
  11. import os
  12. import logger
  13. import config
  14. import sys
  15. processedList=[]
  16. unprocessedList=[]
  17. from win32com import client as wc
  18. #生成资源文件目录访问路径
  19. def saveasdocx(docpath):
  20. word = wc.Dispatch("Word.Application")
  21. doc = word.Documents.Open(docpath) # 打开word文件
  22. doc.SaveAs("{}x".format(docpath), 12) # 另存为后缀为".docx"的文件,其中参数12指docx文件
  23. doc.Close() # 关闭原来word文件
  24. word.Quit()
  25. return "{}x".format(docpath)
  26. def resource_path(relative_path):
  27. if getattr(sys, 'frozen', False): #是否Bundle Resource
  28. base_path = sys._MEIPASS
  29. else:
  30. base_path = os.path.abspath(".")
  31. return os.path.join(base_path, relative_path)
  32. def iter_block_items(parent):
  33. """
  34. 遍历节点
  35. """
  36. #document为主节点
  37. if type(parent)==document.DocumentPart:
  38. parent_elm = parent.element.body
  39. else:
  40. parent_elm=parent
  41. for child in parent_elm.iterchildren():
  42. #print(type(child))
  43. yield child
  44. for child2 in iter_block_items(child):
  45. yield child2
  46. def runCmd(cmd):
  47. p=os.popen(cmd).readlines()
  48. logger.info(p)
  49. def traversDoc(document_part):
  50. for para in document_part.paragraphs:
  51. #print(para)
  52. for run in para.runs:
  53. height = None
  54. width = None
  55. for children in iter_block_items(run.element):
  56. print(children)
  57. if children.tag == "{urn:schemas-microsoft-com:vml}shape":
  58. """'height:13.95pt;width:42.95pt;'"""
  59. shape = children.attrib["style"]
  60. #print("shape: "+shape)
  61. heightpattern = "height:([0-9]+(\.?[0-9]+)?)pt"
  62. widthpattern = "width:([0-9]+(\.?[0-9]+)?)pt"
  63. if re.search(heightpattern, shape):
  64. height = re.search(heightpattern, shape).group(1)
  65. if re.search(widthpattern, shape):
  66. width = re.search(widthpattern, shape).group(1)
  67. elif children.tag == "{urn:schemas-microsoft-com:vml}imagedata":
  68. if "{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id" in children.attrib:
  69. rid = children.attrib["{http://schemas.openxmlformats.org/officeDocument/2006/relationships}id"]
  70. processedList.append(rid)
  71. imagedata = document_part.part.rels[rid].target_part
  72. if imagedata.content_type !="image/x-wmf":
  73. continue
  74. with open("temp.wmf", "wb") as file:
  75. file.write(imagedata.blob)
  76. srcpath = os.getcwd() + os.sep + "temp.wmf"
  77. dstpath = os.getcwd() + os.sep + "temp.png"
  78. cmd = resource_path(os.path.join("tools","Project1.exe"))+" " + "\"" + srcpath + "\"" + " " + "\"" + dstpath + "\""
  79. logger.info(cmd)
  80. runCmd(cmd)
  81. run.clear()
  82. # 获取图片大小
  83. from PIL import Image
  84. img = Image.open("temp.png")
  85. if not height:
  86. height = img.size[1] /37/72
  87. else:
  88. height = float(height) / 72
  89. if not width:
  90. width = img.size[0] /37/72
  91. else:
  92. width = float(width) / 72
  93. run.add_picture("temp.png", width=Inches(width), height=Inches(height))
  94. def processfile(filepath):
  95. global processedList
  96. global unprocessedList
  97. processedList=[]
  98. unprocessedList=[]
  99. if filepath.endswith(".docx"):
  100. document = Package.open(filepath).main_document_part.document
  101. #写入unprocessList
  102. relslist=document.part.rels
  103. for rel in relslist:
  104. if relslist[rel].target_part.content_type=="'image/x-wmf'":
  105. unprocessedList.append(rel)
  106. traversDoc(document)
  107. #比较是否一致
  108. if unprocessedList.sort()==processedList.sort():
  109. logger.info(filepath+"处理完成没有遗漏")
  110. else:
  111. logger.info(filepath+"处理有遗漏")
  112. Lst=[]
  113. for m in unprocessedList:
  114. if m not in processedList:
  115. Lst.append(m)
  116. logger.info("遗漏为",Lst)
  117. if not os.path.exists("result"):
  118. os.makedirs("result")
  119. document.save("result/"+os.path.basename(filepath))
  120. elif filepath.endswith(".doc"):
  121. dstpath=saveasdocx(filepath)
  122. processfile(dstpath)
  123. else:
  124. logger.info(filepath + "是不支持的格式")
  125. def main():
  126. if os.path.isdir(config.filepath):
  127. for root, dirs, files in os.walk(config.filepath):
  128. for f in files:
  129. path=os.path.join(root, f)
  130. processfile(path)
  131. else:
  132. processfile(config.filepath)
  133. if __name__=="__main__":
  134. main()