htmlhandler.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183
  1. from bs4 import BeautifulSoup
  2. from bs4 import element
  3. import re
  4. import logger
  5. def splittagElement(blocklist,childrenelement):
  6. if type(childrenelement) == element.NavigableString:
  7. block = {
  8. "type": "text",
  9. "value": childrenelement,
  10. "playTime": None,
  11. "param": None
  12. }
  13. blocklist.append(block)
  14. elif type(childrenelement) == element.Tag:
  15. for child in childrenelement.children:
  16. if child.name == "a":
  17. playTime = None
  18. if "playtime" in child.attrs:
  19. playTime = child.attrs["playtime"]
  20. if "url" in child.attrs:
  21. block = {
  22. "type": "audio",
  23. "value": child.attrs["url"],
  24. "playTime": playTime,
  25. "param": None
  26. }
  27. blocklist.append(block)
  28. elif child.name == "img":
  29. if "width" in child.attrs and "height" in child.attrs:
  30. width=child.attrs["width"]
  31. height=child.attrs["height"]
  32. elif "style" in child.attrs:
  33. style=child.attrs["style"]
  34. width=re.search("width:([^;]*)",style).group(1)
  35. height=re.search("height:([^;]*)",style).group(1)
  36. else:
  37. width=None
  38. height=None
  39. if width or height:
  40. param={
  41. "width":width,
  42. "height":height
  43. }
  44. else:
  45. param=None
  46. block = {
  47. "type": "image",
  48. "value": child.attrs["src"],
  49. "playTime": None,
  50. "param":param
  51. }
  52. blocklist.append(block)
  53. else:
  54. splittagElement(blocklist,child)
  55. def splitQuestionBody(questionBody):
  56. """
  57. 切分quesitonBody
  58. """
  59. sectionlist=[]
  60. bodyhtml=BeautifulSoup(questionBody,features="html.parser")
  61. plist=bodyhtml.find_all("p")
  62. for p in plist:
  63. #遍历p标签
  64. section={
  65. "blocks":[]
  66. }
  67. splittagElement(section["blocks"],p)
  68. sectionlist.append(section)
  69. return sectionlist
  70. def splitQuesitonAnswer(quesitonanswer):
  71. """
  72. 切分问题标答
  73. """
  74. sectionlist = []
  75. #print(quesitonanswer)
  76. if quesitonanswer:
  77. bodyhtml = BeautifulSoup(quesitonanswer, features="html.parser")
  78. plist = bodyhtml.find_all("p")
  79. for p in plist:
  80. # 遍历p标签
  81. section = {
  82. "blocks": []
  83. }
  84. splittagElement(section["blocks"], p)
  85. sectionlist.append(section)
  86. else:
  87. sectionlist=None
  88. return sectionlist
  89. def splitQuesitonStudentAnswer(examrecordid,studentanswer,answerType,questionType):
  90. """
  91. 切分学生答案
  92. """
  93. #logger.info("examrecordid is %s,questionType is %s,answerType is %s,studentanswer is %s"%(str(examrecordid),questionType,answerType,studentanswer))
  94. if studentanswer:
  95. sectionlist = []
  96. if questionType=="ESSAY":
  97. if answerType=="SINGLE_AUDIO":
  98. answer={
  99. "blocks":[{
  100. "type": "audio",
  101. "value": studentanswer,
  102. "playTime": None,
  103. "param": None
  104. }]}
  105. sectionlist.append(answer)
  106. else:
  107. #取图片
  108. bodyhtml = BeautifulSoup(studentanswer, features="html.parser")
  109. blocks={
  110. "blocks":[]
  111. }
  112. for body in bodyhtml:
  113. if type(body) == element.NavigableString:
  114. textanswer = {
  115. "type": "text",
  116. "value": body,
  117. "playTime": None,
  118. "param": None
  119. }
  120. blocks["blocks"].append(textanswer)
  121. elif type(body)!=element.Comment:
  122. alist = body.find_all("a")
  123. if len(alist)>0:
  124. for atag in alist:
  125. imageanswer={
  126. "type": "image",
  127. "value": atag.attrs["href"],
  128. "playTime": None,
  129. "param": {
  130. "width": 200,
  131. "height": 200
  132. }
  133. }
  134. blocks["blocks"].append(imageanswer)
  135. else:
  136. textanswer = {
  137. "type": "text",
  138. "value": body.text,
  139. "playTime": None,
  140. "param": None
  141. }
  142. blocks["blocks"].append(textanswer)
  143. sectionlist.append(blocks)
  144. elif questionType=="FILL_UP":
  145. stuanswer=""
  146. for stu in studentanswer.split("##"):
  147. if stu =="":
  148. stuanswer=stuanswer+","+" "
  149. else:
  150. stuanswer=stuanswer+","+stu
  151. answer = {
  152. "blocks":[{
  153. "type": "text",
  154. "value": stuanswer[1:],
  155. "playTime": None,
  156. "param": None
  157. }]
  158. }
  159. sectionlist.append(answer)
  160. else:
  161. answer = {
  162. "blocks": [{
  163. "type": "text",
  164. "value": studentanswer,
  165. "playTime": None,
  166. "param": None
  167. }]
  168. }
  169. sectionlist.append(answer)
  170. print(sectionlist)
  171. else:
  172. sectionlist = None
  173. return sectionlist