|
@@ -0,0 +1,257 @@
|
|
|
+package cn.com.qmth.examcloud.support.handler.richtext2;
|
|
|
+
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
+import org.jsoup.Jsoup;
|
|
|
+import org.jsoup.nodes.Document;
|
|
|
+import org.jsoup.nodes.Element;
|
|
|
+import org.jsoup.nodes.Node;
|
|
|
+import org.jsoup.nodes.TextNode;
|
|
|
+import org.slf4j.Logger;
|
|
|
+import org.slf4j.LoggerFactory;
|
|
|
+import org.springframework.util.CollectionUtils;
|
|
|
+
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.List;
|
|
|
+import java.util.regex.Matcher;
|
|
|
+import java.util.regex.Pattern;
|
|
|
+
|
|
|
+/**
|
|
|
+ * HTML结构转换为“富文本”JSON结构
|
|
|
+ * 注:特殊定制,只处理业务所需的标签和结构,如:试题题干内容
|
|
|
+ *
|
|
|
+ * @author: Deason
|
|
|
+ * @since: 2021/9/10
|
|
|
+ */
|
|
|
+public class RichTextConverter implements TagConstant {
|
|
|
+
|
|
|
+ private final static Logger log = LoggerFactory.getLogger(RichTextConverter.class);
|
|
|
+
|
|
|
+ public static Result parse(String html) {
|
|
|
+ if (StringUtils.isEmpty(html)) {
|
|
|
+ return new Result();
|
|
|
+ }
|
|
|
+
|
|
|
+ Document document = Jsoup.parse(html);
|
|
|
+ return parse(document);
|
|
|
+ }
|
|
|
+
|
|
|
+ public static Result parse(Document document) {
|
|
|
+ Element body = document.body();
|
|
|
+ List<Node> nodes = body.childNodes();
|
|
|
+
|
|
|
+ Result result = new Result();
|
|
|
+ result.setSections(splitSections(nodes));
|
|
|
+ return result;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static List<Section> splitSections(List<Node> nodes) {
|
|
|
+ if (CollectionUtils.isEmpty(nodes)) {
|
|
|
+ return new ArrayList<>();
|
|
|
+ }
|
|
|
+
|
|
|
+ List<List<Node>> groups = new ArrayList<>();
|
|
|
+
|
|
|
+ if (nodes.size() == 1) {
|
|
|
+ groups.add(nodes);
|
|
|
+ } else {
|
|
|
+ List<Node> tempNodes = new ArrayList<>();
|
|
|
+
|
|
|
+ for (int n = 0; n < nodes.size(); n++) {
|
|
|
+ Node node = nodes.get(n);
|
|
|
+
|
|
|
+ if (TAG_P.equals(node.nodeName())) {
|
|
|
+ if (!CollectionUtils.isEmpty(tempNodes)) {
|
|
|
+ // 先将“当前p标签”之前的元素作为一组
|
|
|
+ groups.add(new ArrayList<>(tempNodes));
|
|
|
+ tempNodes.clear();
|
|
|
+ }
|
|
|
+
|
|
|
+ // 再将“当前p标签”的元素作为一组
|
|
|
+ tempNodes.add(node);
|
|
|
+ groups.add(new ArrayList<>(tempNodes));
|
|
|
+ tempNodes.clear();
|
|
|
+ } else {
|
|
|
+ // 非“p标签”元素
|
|
|
+ tempNodes.add(node);
|
|
|
+ }
|
|
|
+
|
|
|
+ if (n == (nodes.size() - 1) && !CollectionUtils.isEmpty(tempNodes)) {
|
|
|
+ // 最后一个元素 且 非“p标签”元素
|
|
|
+ groups.add(tempNodes);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ List<Section> sections = new ArrayList<>();
|
|
|
+ for (List<Node> curNodes : groups) {
|
|
|
+ List<Block> blocks = new ArrayList<>();
|
|
|
+ splitBlocks(curNodes, blocks);
|
|
|
+ if (blocks.isEmpty()) {
|
|
|
+ continue;
|
|
|
+ }
|
|
|
+
|
|
|
+ Section section = new Section();
|
|
|
+ section.setBlocks(blocks);
|
|
|
+ sections.add(section);
|
|
|
+ }
|
|
|
+ groups.clear();
|
|
|
+
|
|
|
+ return sections;
|
|
|
+ }
|
|
|
+
|
|
|
+ private static void splitBlocks(List<Node> nodes, List<Block> blocks) {
|
|
|
+ if (nodes.isEmpty()) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ for (Node node : nodes) {
|
|
|
+ if (node instanceof Element) {
|
|
|
+ Element element = (Element) node;
|
|
|
+
|
|
|
+ if (TAG_IMG.equals(node.nodeName())) {
|
|
|
+ // 处理图片
|
|
|
+ processImg(element, blocks);
|
|
|
+ } else if (TAG_A.equals(node.nodeName())) {
|
|
|
+ // 处理音频
|
|
|
+ processAudio(element, blocks);
|
|
|
+ } else if (TAG_B.equals(node.nodeName()) || TAG_STRONG.equals(node.nodeName())) {
|
|
|
+ // 处理文本 加粗
|
|
|
+ Param param = new Param();
|
|
|
+ param.setBold(true);
|
|
|
+ processText(element.text(), param, blocks);
|
|
|
+ } else if (TAG_I.equals(node.nodeName()) || TAG_EM.equals(node.nodeName())) {
|
|
|
+ // 处理文本 斜体
|
|
|
+ Param param = new Param();
|
|
|
+ param.setItalic(true);
|
|
|
+ processText(element.text(), param, blocks);
|
|
|
+ } else if (TAG_U.equals(node.nodeName())) {
|
|
|
+ // 处理文本 下划线
|
|
|
+ Param param = new Param();
|
|
|
+ param.setUnderline(true);
|
|
|
+ processText(element.text(), param, blocks);
|
|
|
+ } else if (TAG_SUP.equals(node.nodeName())) {
|
|
|
+ // 处理文本 上标
|
|
|
+ Param param = new Param();
|
|
|
+ param.setSup(true);
|
|
|
+ processText(element.text(), param, blocks);
|
|
|
+ } else if (TAG_SUB.equals(node.nodeName())) {
|
|
|
+ // 处理文本 下标
|
|
|
+ Param param = new Param();
|
|
|
+ param.setSub(true);
|
|
|
+ processText(element.text(), param, blocks);
|
|
|
+ } else {
|
|
|
+ splitBlocks(node.childNodes(), blocks);
|
|
|
+ }
|
|
|
+ } else {
|
|
|
+ if (node instanceof TextNode) {
|
|
|
+ TextNode element = (TextNode) node;
|
|
|
+ // 处理文本
|
|
|
+ processText(element.text(), null, blocks);
|
|
|
+ } else {
|
|
|
+ log.debug("Ignore node {}", node.nodeName());
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ private static void processImg(Element element, List<Block> blocks) {
|
|
|
+ String src = element.attr(ATTR_SRC);
|
|
|
+ if (StringUtils.isBlank(src)) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ String width = element.attr(ATTR_WIDTH);
|
|
|
+ String height = element.attr(ATTR_HEIGHT);
|
|
|
+ String style = element.attr(ATTR_STYLE);
|
|
|
+ if (StringUtils.isBlank(width)) {
|
|
|
+ width = extractStyle(style, ATTR_WIDTH);
|
|
|
+ }
|
|
|
+ if (StringUtils.isBlank(height)) {
|
|
|
+ height = extractStyle(style, ATTR_HEIGHT);
|
|
|
+ }
|
|
|
+
|
|
|
+ Param param = new Param();
|
|
|
+ param.setWidth(extractNumber(width));
|
|
|
+ param.setHeight(extractNumber(height));
|
|
|
+
|
|
|
+ Block block = new Block();
|
|
|
+ block.setType(BlockType.image);
|
|
|
+ block.setValue(src);
|
|
|
+ block.setParam(param);
|
|
|
+ blocks.add(block);
|
|
|
+ }
|
|
|
+
|
|
|
+ private static void processAudio(Element element, List<Block> blocks) {
|
|
|
+ String id = element.attr(ATTR_ID);
|
|
|
+ String name = element.attr(ATTR_NAME);
|
|
|
+ if (StringUtils.isBlank(id) || StringUtils.isBlank(name) || !name.endsWith(AUDIO_SUFFIX)) {
|
|
|
+ // 按文本处理
|
|
|
+ // processText(element.text(), null, blocks);
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ Block block = new Block();
|
|
|
+ block.setType(BlockType.audio);
|
|
|
+ block.setValue(id + AUDIO_SUFFIX);
|
|
|
+ blocks.add(block);
|
|
|
+ }
|
|
|
+
|
|
|
+ private static void processText(String value, Param param, List<Block> blocks) {
|
|
|
+ if (StringUtils.isBlank(value)) {
|
|
|
+ return;
|
|
|
+ }
|
|
|
+
|
|
|
+ Block block = new Block();
|
|
|
+ block.setType(BlockType.text);
|
|
|
+ block.setValue(value);
|
|
|
+ block.setParam(param);
|
|
|
+ blocks.add(block);
|
|
|
+ }
|
|
|
+
|
|
|
+ private static String extractStyle(String style, String attributeName) {
|
|
|
+ if (StringUtils.isBlank(style)) {
|
|
|
+ return "";
|
|
|
+ }
|
|
|
+
|
|
|
+ style = style.toLowerCase();
|
|
|
+ if (!style.contains(attributeName)) {
|
|
|
+ return "";
|
|
|
+ }
|
|
|
+
|
|
|
+ // 截取样式内属性值
|
|
|
+ String[] attributes = style.split(";");
|
|
|
+ for (String attribute : attributes) {
|
|
|
+ if (attribute.trim().startsWith(attributeName)) {
|
|
|
+ return attribute.replaceFirst(attributeName, "")
|
|
|
+ .replaceFirst(":", "")
|
|
|
+ .trim();
|
|
|
+ }
|
|
|
+ }
|
|
|
+
|
|
|
+ return "";
|
|
|
+ }
|
|
|
+
|
|
|
+ private static String extractNumber(String str) {
|
|
|
+ if (StringUtils.isBlank(str)) {
|
|
|
+ return "";
|
|
|
+ }
|
|
|
+
|
|
|
+ // 截取数字
|
|
|
+ Pattern pattern = Pattern.compile("\\d+");
|
|
|
+ Matcher matcher = pattern.matcher(str);
|
|
|
+ while (matcher.find()) {
|
|
|
+ return matcher.group(0);
|
|
|
+ }
|
|
|
+ return "";
|
|
|
+ }
|
|
|
+
|
|
|
+ private static String replaceTags(String str) {
|
|
|
+ str = str.replace(" ", "");//消除空格
|
|
|
+ str = str.replace(""", "\"");//将"转换成"
|
|
|
+ str = str.replace("<", "<");//将<转换成<
|
|
|
+ str = str.replace(">", ">");//将>转换成>
|
|
|
+ str = str.replace("&", "&");//将&转换成&
|
|
|
+ return str;
|
|
|
+ }
|
|
|
+
|
|
|
+}
|