Pārlūkot izejas kodu

修复导入导出bug,优化相似度

宋悦 8 gadi atpakaļ
vecāks
revīzija
7b83b2d2f8

+ 2 - 2
cqb-comm-utils/src/main/java/com/qmth/cqb/utils/CommonUtils.java

@@ -24,8 +24,8 @@ public final class CommonUtils {
 
     private static final String[] CN_BIG_NUM = { "十", "百", "千", "万", "十", "百", "千", "亿", "十", "百", "千" };
 
-    public static final String PAPER_TITLE="山东大学网络教育";
-    public static final String PAPER_SUB_TITLE="2016-2017学年第一学期期末考试";
+    public static final String PAPER_TITLE="中国石油大学";
+    public static final String PAPER_SUB_TITLE="网络教育";
 
     /**
      * 加载properties配置文件

+ 8 - 5
cqb-comm-utils/src/main/java/com/qmth/cqb/utils/StringSimilarityUtils.java

@@ -12,6 +12,7 @@ import java.util.Set;
 import org.ansj.domain.Result;
 import org.ansj.domain.Term;
 import org.ansj.splitWord.analysis.ToAnalysis;
+import org.apache.commons.lang3.StringUtils;
 
 /**
  * 计算相似度工具包:
@@ -33,7 +34,9 @@ public class StringSimilarityUtils {
         Result result = ToAnalysis.parse(str);
         List<Term> terms = result.getTerms();
         for (Term term : terms) {
-            segResult.add(term.getName());
+            if(StringUtils.isNotEmpty(term.getName().trim())){
+                segResult.add(term.getName());
+            }
         }
         return segResult;
     }
@@ -270,13 +273,13 @@ public class StringSimilarityUtils {
      */
     public static String stringFilter(String str) {
         String regEx = "[_`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。,、?]";
-        return str.replaceAll("\\s*", "").replaceAll(regEx, "");
+        return str.replaceAll(regEx, "").trim();
 
     }
 
     public static void main(String[] args) {
-        String str1 = "秦汉以    来的公文程式构   成有  :::::\n    <><>_________________ !!!!!";
-        String str2 = "明清以来的公文程式构成有";
+        String str1 = "秦汉以i am filter he is hehe abc来的公文程式构成有\n<><>_________________ !!!!!";
+        String str2 = "More roads than one lead to the mountain village.";
         // System.out.println(StringSimilarityUtils.stringFilter(str1));
         // System.out.println(StringSimilarityUtils.stringFilter(str2));
         // //double similarity1 =
@@ -290,6 +293,6 @@ public class StringSimilarityUtils {
         // System.out.println(similarity_cos);
         // System.out.println(similarity_dice);
         // System.out.println(similarity_diceopt);
-        System.out.println(segmentText(str2));
+        System.out.println(segmentText(stringFilter(str1)));
     }
 }

+ 1 - 1
cqb-comm-utils/src/main/java/com/qmth/cqb/utils/word/DocxProcessUtil.java

@@ -121,7 +121,7 @@ public final class DocxProcessUtil {
         List<Object> tList = getAllElementFromObject(p, Text.class);
         for (Object obj : tList) {
             Text text = (Text) obj;
-            returnText += text.getValue();
+            returnText += text.getValue().trim();
         }
         return returnText.trim();
     }

+ 49 - 21
cqb-paper/src/main/java/com/qmth/cqb/paper/service/ImportPaperService.java

@@ -8,8 +8,6 @@ import java.util.HashMap;
 import java.util.List;
 import java.util.Map;
 import java.util.UUID;
-import java.util.regex.Matcher;
-import java.util.regex.Pattern;
 
 import org.apache.commons.io.FileUtils;
 import org.apache.commons.io.IOUtils;
@@ -232,7 +230,7 @@ public class ImportPaperService {
                 // 跳过空白段落
                 continue;
 
-            } else if (pText.startsWith("[试题分类]")) {
+            } else if (isQuesHeader(pText)) {
                 // 处理大题头信息
                 processMainQuesHeader(pList, importPaperCheck.index, importPaperCheck);
 
@@ -250,6 +248,10 @@ public class ImportPaperService {
             } else if (pText.matches("^\\d{1,}\\.[\\s\\S]*")
                     || (isNested(importPaperCheck) && !pText.startsWith("["))) {
 
+                if(paperDetail == null){
+                    throw new PaperException("导入文件格式有误,必须有大题头信息,且以 [ 开头!");
+                }
+
                 ++subQuesNum;
 
                 // 处理试题
@@ -265,7 +267,7 @@ public class ImportPaperService {
                 if (importPaperCheck.quesType.equals("单选") || importPaperCheck.quesType.equals("多选")) {
 
                     // 处理题干
-                    processQuesBody(pList, importPaperCheck.index, question, importPaperCheck, tmpWordMlPackage);
+                    processQuesBody(pList, importPaperCheck.index, subQuesNum, question, importPaperCheck, tmpWordMlPackage);
                     // 处理选项
                     processQuesOption(pList, importPaperCheck.index, subQuesNum, question, importPaperCheck,
                             tmpWordMlPackage);
@@ -276,11 +278,11 @@ public class ImportPaperService {
                     processSelectOption(question);
                 } else if (importPaperCheck.quesType.equals("套题")) {
                     // 处理套题
-                    processNestedQues(pList, importPaperCheck.index, question, paperDetailUnit, importPaperCheck,
+                    processNestedQues(pList, importPaperCheck.index,question, paperDetailUnit, importPaperCheck,
                             tmpWordMlPackage);
                 } else {
                     // 处理其他题型
-                    processQuesBody(pList, importPaperCheck.index, question, importPaperCheck, tmpWordMlPackage);
+                    processQuesBody(pList, importPaperCheck.index, subQuesNum,question, importPaperCheck, tmpWordMlPackage);
                     processQuesTail(pList, importPaperCheck.index, subQuesNum, question, paperDetailUnit,
                             importPaperCheck, tmpWordMlPackage, false);
                 }
@@ -295,14 +297,15 @@ public class ImportPaperService {
 
                 // 设置当前索引,防止多余循环
                 i = importPaperCheck.index - 1;
+            }else if(paperDetail == null){
+                throw new PaperException("导入文件格式有误,必须有大题头信息,且以 [ 开头!");
             }
             if (!StringUtils.isEmpty(importPaperCheck.errorInfo)) {
                 throw new PaperException(importPaperCheck.errorInfo);
             }
         }
-        if (paperDetails.size() == 0) {
-            importPaperCheck.setErrorInfo("导入文件格式有误!");
-            throw new PaperException(importPaperCheck.errorInfo);
+        if (paperDetails.size() == 0 || paperDetailUnits.size() == 0) {
+            throw new PaperException("导入文件格式有误!");
         }
         paper.setPaperDetailCount(mainQuesNum);
         // 保存导入试卷信息
@@ -316,6 +319,13 @@ public class ImportPaperService {
         return paper;
     }
 
+    public boolean isQuesHeader(String pText){
+        return pText.startsWith("[")
+                && !pText.startsWith("[答案]")
+                && !pText.startsWith("[小题分数]")
+                && !pText.startsWith("[小题型]");
+    }
+
     /**
      * 初始化试卷信息
      *
@@ -466,13 +476,13 @@ public class ImportPaperService {
      * @param wordMLPackage
      * @throws Exception
      */
-    public void processQuesBody(List pList, int index, Question question, ImportPaperCheck importPaperCheck,
+    public void processQuesBody(List pList, int index, int subQuesNum,Question question, ImportPaperCheck importPaperCheck,
             WordprocessingMLPackage wordMLPackage) throws Exception {
         // 定义题干wordml和html
         StringBuilder quesBodyWordMl = new StringBuilder();
         StringBuilder quesBodyHtml = new StringBuilder();
         int i = 0;
-
+        boolean bodyStart = false;
         for (i = index; i < pList.size(); i++) {
             P pBody = (P) pList.get(i);
             String tmpText = DocxProcessUtil.getPText(pBody);
@@ -483,6 +493,7 @@ public class ImportPaperService {
             } else if (tmpText.matches("^\\d{1,}\\.[\\s\\S]*")) {
                 // 题干第一段
                 // 过滤题干标题
+                bodyStart = true;
                 pBody = DocxProcessUtil.formatP(pBody, QuesUnit.QUES_BODY);
                 String tmpWordMl = DocxProcessUtil.getPWordMl(pBody);
                 quesBodyWordMl.append(DocxProcessUtil.formatPWordMl(tmpWordMl));
@@ -490,13 +501,25 @@ public class ImportPaperService {
             } else if (tmpText.matches("^[a-zA-Z]\\.[\\s\\S]*") || tmpText.startsWith("[")) {
                 // 检测到选项或其他特殊段落直接退出
                 break;
-            } else {
+            } else if(bodyStart){
                 // 题干普通段落
                 String tmpWordMl = DocxProcessUtil.getPWordMl(pBody);
                 quesBodyWordMl.append(DocxProcessUtil.formatPWordMl(tmpWordMl));
                 quesBodyHtml.append(DocxProcessUtil.docx2Html(tmpWordMl, wordMLPackage));
+            }else{
+                break;
             }
         }
+        if(StringUtils.isEmpty(quesBodyHtml) || StringUtils.isEmpty(quesBodyWordMl)){
+            if(subQuesNum == 0){
+                importPaperCheck.setErrorInfo(importPaperCheck.quesName
+                        +"题干为空或格式不正确");
+            }else{
+                importPaperCheck.setErrorInfo(getQuesNumInfo(importPaperCheck.quesName,subQuesNum)
+                        +"题干为空或格式不正确");
+            }
+            throw new PaperException(importPaperCheck.errorInfo);
+        }
         importPaperCheck.setIndex(i);
         question.setQuesBody(quesBodyHtml.toString());
         question.setQuesBodyWord(quesBodyWordMl.toString());
@@ -562,12 +585,15 @@ public class ImportPaperService {
                 String tmpWordMl = DocxProcessUtil.getPWordMl(pOption);
                 current.setOptionBody(current.getOptionBody() + DocxProcessUtil.docx2Html(tmpWordMl, wordMLPackage));
                 current.setOptionBodyWord(current.getOptionBodyWord() + DocxProcessUtil.formatPWordMl(tmpWordMl));
+            }else{
+                break;
             }
         }
         importPaperCheck.setIndex(i);
         if (optionCount < 2) {
-            importPaperCheck.setErrorInfo(
-                    getQuesNumInfo(importPaperCheck.quesName, subQuesNum) + "中选项格式不正确或有缺失,如没有缺失可尝试换行处理\n");
+            importPaperCheck.setErrorInfo(getQuesNumInfo(importPaperCheck.quesName, subQuesNum)
+                            + "中选项格式不正确或有缺失\n");
+            throw new PaperException(importPaperCheck.errorInfo);
         } else {
             question.setQuesOptions(quesOptions);
         }
@@ -628,6 +654,8 @@ public class ImportPaperService {
                 String tmpWordMl = DocxProcessUtil.getPWordMl(pAnswer);
                 answerWordML.append(DocxProcessUtil.formatPWordMl(tmpWordMl));
                 answerHTML.append(DocxProcessUtil.docx2Html(tmpWordMl, wordMLPackage));
+            } else {
+                break;
             }
         }
         importPaperCheck.setIndex(i);
@@ -638,6 +666,7 @@ public class ImportPaperService {
         } else {
             importPaperCheck.setErrorInfo(getQuesNumInfo(importPaperCheck.quesName, subQuesNum)
                     + "答案为空或格式不正确\n");
+            throw new PaperException(importPaperCheck.errorInfo);
         }
 
         // 设置预设分数
@@ -683,13 +712,13 @@ public class ImportPaperService {
      * @param importPaperCheck
      * @throws Exception
      */
-    public void processNestedQues(List pList, int index, Question question, PaperDetailUnit paperDetailUnit,
+    public void processNestedQues(List pList, int index,Question question, PaperDetailUnit paperDetailUnit,
             ImportPaperCheck importPaperCheck, WordprocessingMLPackage wordMLPackage) throws Exception {
         // 题型
         String nestedQuesType = "";
 
         // 设置套题题干
-        processQuesBody(pList, index, question, importPaperCheck, wordMLPackage);
+        processQuesBody(pList, index, 0,question, importPaperCheck, wordMLPackage);
 
         // 创建小题集合
         List<Question> subQuesList = new ArrayList<Question>();
@@ -721,7 +750,7 @@ public class ImportPaperService {
                 if (StringUtils.isEmpty(nestedQuesType)) {
                     importPaperCheck.setErrorInfo(getQuesNumInfo(importPaperCheck.quesName, subQuesNum)
                             + "小题型为空或格式不正确\n");
-                    return;
+                    throw new PaperException(importPaperCheck.errorInfo);
                 }
 
                 subQues = new Question();
@@ -734,7 +763,7 @@ public class ImportPaperService {
                 if (nestedQuesType.equals("单选") || nestedQuesType.equals("多选")) {
 
                     // 处理题干
-                    processQuesBody(pList, importPaperCheck.index, subQues, importPaperCheck, wordMLPackage);
+                    processQuesBody(pList, importPaperCheck.index, subQuesNum,subQues, importPaperCheck, wordMLPackage);
 
                     // 处理选项
                     processQuesOption(pList, importPaperCheck.index, subQuesNum, subQues, importPaperCheck,
@@ -749,7 +778,7 @@ public class ImportPaperService {
 
                 } else {
                     // 处理其他题型
-                    processQuesBody(pList, importPaperCheck.index, subQues, importPaperCheck, wordMLPackage);
+                    processQuesBody(pList, importPaperCheck.index, subQuesNum, subQues, importPaperCheck, wordMLPackage);
 
                     processQuesTail(pList, importPaperCheck.index, subQuesNum, subQues, paperDetailUnit,
                             importPaperCheck, wordMLPackage, true);
@@ -763,12 +792,11 @@ public class ImportPaperService {
                 if (StringUtils.isEmpty(nestedQuesType)) {
                     importPaperCheck.setErrorInfo(getQuesNumInfo(importPaperCheck.quesName, quesTypeNum)
                             + "小题型为空或格式不正确\n");
-                    return;
                 }else{
                     importPaperCheck.setErrorInfo(getQuesNumInfo(importPaperCheck.quesName, quesTypeNum)
                             + "题干为空或格式不正确,必须以数字.开头\n");
-                    return;
                 }
+                throw new PaperException(importPaperCheck.errorInfo);
 
             }else if(tmpText.startsWith("[")){
                 break;

+ 1 - 1
cqb-paper/src/main/java/com/qmth/cqb/paper/web/ImportPaperController.java

@@ -73,7 +73,7 @@ public class ImportPaperController {
             return new ResponseEntity(paper, HttpStatus.OK);
         } catch (Exception e) {
             e.printStackTrace();
-            log.info("导入异常:" + e.getMessage());
+            log.error("导入异常:" + e.getMessage());
             return new ResponseEntity(new ErrorMsg(e.getMessage()), HttpStatus.INTERNAL_SERVER_ERROR);
         } finally {
             log.info("导入结束");