浏览代码

重构部分,增加查重工具类

宋悦 8 年之前
父节点
当前提交
24b3925fc7

+ 6 - 0
cqb-comm-utils/pom.xml

@@ -183,5 +183,11 @@
             <version>1.3.2</version>
         </dependency>
 
+        <dependency>
+            <groupId>org.ansj</groupId>
+            <artifactId>ansj_seg</artifactId>
+            <version>5.0.1</version>
+        </dependency>
+
     </dependencies>
 </project>

+ 276 - 0
cqb-comm-utils/src/main/java/com/qmth/cqb/utils/StringSimilarityUtils.java

@@ -0,0 +1,276 @@
+package com.qmth.cqb.utils;
+
+import org.ansj.domain.Result;
+import org.ansj.domain.Term;
+import org.ansj.splitWord.analysis.ToAnalysis;
+
+import java.util.*;
+
+/**
+ * 计算相似度工具包:
+ * @author songyue
+ * @date 2016-05-11
+ */
+public class StringSimilarityUtils {
+
+	/**
+	 * 对输入字符串分词
+	 * @param str
+	 * @return ArrayList
+	 * @author songyue
+	 */
+	public static List<String> segmentText(String str) {
+		List<String> segResult = new ArrayList<String>();// 分词结果
+		Result result = ToAnalysis.parse(str);
+		List<Term> terms = result.getTerms();
+		for(Term term:terms){
+			segResult.add(term.getName());
+		}
+		return segResult;
+	}
+
+	/**
+	 * 计算相似度(两个分词集合,分词匹配,算法为余弦定理)
+	 * @param seg1
+	 * @param seg2
+	 * @return
+	 */
+	public static double getSimilarityWithCosinesBySeg(String seg1, String seg2) {
+		double similarity = 0;
+		int size1 = 0;
+		int size2 = 0;
+		seg1 = stringFilter(seg1);
+		seg2 = stringFilter(seg2);
+		List<String> w1 = segmentText(seg1);
+		List<String> w2 = segmentText(seg2);
+		if (w1 != null && (size1 = w1.size()) != 0 && w2 != null && (size2 = w2.size()) != 0) {
+			Map<String, int[]> countMap = new HashMap<String, int[]>();
+			String index = null;
+			// 将w1与w2分词出现频次统计入coutMap中
+			for (int i = 0; i < size1; i++) {
+				index = w1.get(i);
+				if (index != null) {
+					int[] c = countMap.get(index);
+					if (c != null && c.length == 2) {
+						c[0]++;
+					} else {
+						c = new int[2];
+						c[0] = 1;
+						c[1] = 0;
+						countMap.put(index, c);
+					}
+				}
+			}
+			for (int i = 0; i < size2; i++) {
+				index = w2.get(i);
+				if (index != null) {
+					int[] c = countMap.get(index);
+					if (c != null && c.length == 2) {
+						c[1]++;
+					} else {
+						c = new int[2];
+						c[0] = 0;
+						c[1] = 1;
+						countMap.put(index, c);
+					}
+				}
+			}
+			// 根据余弦定理计算相似度
+			Iterator<String> it = countMap.keySet().iterator();
+			double sum = 0;
+			double s1 = 0;
+			double s2 = 0;
+			while (it.hasNext()) {
+				int[] c = countMap.get(it.next());
+				sum += c[0] * c[1];
+				s1 += c[0] * c[0];
+				s2 += c[1] * c[1];
+			}
+			similarity = sum / Math.sqrt(s1 * s2);
+		} else {
+			throw new NullPointerException("传入的参数为空");
+		}
+		return similarity;
+	}
+
+	/**
+	 * 计算相似度(两个字符串,全字匹配,算法为余弦定理)
+	 * @param w1
+	 * @param w2
+	 * @return
+	 */
+	public static double getSimilarityWithCosinesByWords(String w1, String w2) {
+		double similarity = 0;
+		int size1 = 0;
+		int size2 = 0;
+		w1 = stringFilter(w1);
+		w2 = stringFilter(w2);
+		if (w1 != null && (size1 = w1.length()) != 0 && w2 != null && (size2 = w2.length()) != 0) {
+			Map<Character, int[]> countMap = new HashMap<Character, int[]>();
+			char index;
+			// 将w1与w2所有字符出现频次统计入countMap中
+			for (int i = 0; i < size1; i++) {
+				index = w1.charAt(i);
+				int[] c = countMap.get(index);
+				if (c != null && c.length == 2) {
+					c[0]++;
+				} else {
+					c = new int[2];
+					c[0] = 1;
+					c[1] = 0;
+					countMap.put(index, c);
+				}
+			}
+			for (int i = 0; i < size2; i++) {
+				index = w2.charAt(i);
+				int[] c = countMap.get(index);
+				if (c != null && c.length == 2) {
+					c[1]++;
+				} else {
+					c = new int[2];
+					c[0] = 0;
+					c[1] = 1;
+					countMap.put(index, c);
+				}
+			}
+			// 根据余弦定理计算相似度
+			Iterator<Character> it = countMap.keySet().iterator();
+			double sum = 0;
+			double s1 = 0;
+			double s2 = 0;
+			while (it.hasNext()) {
+				int[] c = countMap.get(it.next());
+				sum += c[0] * c[1];
+				s1 += c[0] * c[0];
+				s2 += c[1] * c[1];
+			}
+			similarity = sum / Math.sqrt(s1 * s2);
+		} else {
+			throw new NullPointerException("传入的参数为空");
+		}
+		return similarity;
+	}
+
+	/**
+	 * 计算相似度(两个字符串,采用优化Dice算法)
+	 * @param w1
+	 * @param w2
+	 * @return
+	 */
+	public static double getSimilarityWithDiceOptByWords(String w1, String w2) {
+		if (w1 == null || w2 == null || w1.length() == 0 || w2.length() == 0)
+			return 0;
+		if (w1 == w2)
+			return 1;
+		if (w1.length() == 1 || w2.length() == 1){
+			if (w1.equals(w2)) {
+				return 1;
+			} else {
+				return 0;
+			}
+		}
+		w1 = stringFilter(w1);
+		w2 = stringFilter(w2);
+		final int n = w1.length() - 1;
+		final int[] sPairs = new int[n];
+		for (int i = 0; i <= n; i++)
+			if (i == 0)
+				sPairs[i] = w1.charAt(i) << 16;
+			else if (i == n)
+				sPairs[i - 1] |= w1.charAt(i);
+			else
+				sPairs[i] = (sPairs[i - 1] |= w1.charAt(i)) << 16;
+
+		final int m = w2.length() - 1;
+		final int[] tPairs = new int[m];
+		for (int i = 0; i <= m; i++)
+			if (i == 0)
+				tPairs[i] = w2.charAt(i) << 16;
+			else if (i == m)
+				tPairs[i - 1] |= w2.charAt(i);
+			else
+				tPairs[i] = (tPairs[i - 1] |= w2.charAt(i)) << 16;
+
+		Arrays.sort(sPairs);
+		Arrays.sort(tPairs);
+
+		int matches = 0, i = 0, j = 0;
+		while (i < n && j < m) {
+			if (sPairs[i] == tPairs[j]) {
+				matches += 2;
+				i++;
+				j++;
+			} else if (sPairs[i] < tPairs[j])
+				i++;
+			else
+				j++;
+		}
+		return (double) matches / (n + m);
+	}
+
+	/**
+	 * 计算相似度(两个字符串,采用一般Dice算法)
+	 * @param w1
+	 * @param w2
+	 * @return
+	 */
+	public static double getSimilarityWithDiceByWords(String w1, String w2) {
+		double similarity = 0;
+		if (w1 != null && w1.length() != 0 && w2 != null && w2.length() != 0) {
+			if (w1.length() == 1 || w2.length() == 1){
+				if (w1.equals(w2)) {
+					return 1;
+				} else {
+					return 0;
+				}
+			}
+			w1 = stringFilter(w1);
+			w2 = stringFilter(w2);
+			Set<String> nx = new HashSet<String>();
+			Set<String> ny = new HashSet<String>();
+
+			for (int i = 0; i < w1.length() - 1; i++) {
+				char x1 = w1.charAt(i);
+				char x2 = w1.charAt(i + 1);
+				String tmp = "" + x1 + x2;
+				nx.add(tmp);
+			}
+			for (int j = 0; j < w2.length() - 1; j++) {
+				char y1 = w2.charAt(j);
+				char y2 = w2.charAt(j + 1);
+				String tmp = "" + y1 + y2;
+				ny.add(tmp);
+			}
+			Set<String> intersection = new HashSet<String>(nx);
+			intersection.retainAll(ny);
+			double totcombigrams = intersection.size();
+			similarity = (2 * totcombigrams) / (nx.size() + ny.size());
+		}
+		return similarity;
+	}
+
+	/**
+	 * 过滤特殊字符
+	 * @param str
+	 * @return
+	 */
+	public static String stringFilter(String str) {
+		String regEx = "[_`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。,、?]";
+		return str.replaceAll("\\s*", "").replaceAll(regEx, "");
+	}
+
+	public static void main(String[] args) {
+		String str1 = "秦汉以    来的公文程式构   成有  :::::\n    <><>_________________ !!!!!";
+		String str2 = "明清以来的公文程式构成有";
+//		System.out.println(StringSimilarityUtils.stringFilter(str1));
+//		System.out.println(StringSimilarityUtils.stringFilter(str2));
+//		//double similarity1 = StringSimilarityUtils.getSimilarityWithCosinesBySeg(str1, str2);
+//		double similarity_cos = StringSimilarityUtils.getSimilarityWithCosinesByWords(str1, str2);
+//		double similarity_dice = StringSimilarityUtils.getSimilarityWithDiceByWords(str1, str2);
+//		double similarity_diceopt = StringSimilarityUtils.getSimilarityWithDiceOptByWords(str1, str2);
+//		System.out.println(similarity_cos);
+//		System.out.println(similarity_dice);
+//		System.out.println(similarity_diceopt);
+		System.out.println(segmentText(str2));
+	}
+}

+ 18 - 9
cqb-comm-utils/src/main/java/com/qmth/cqb/utils/word/DocxProcessUtil.java

@@ -580,43 +580,52 @@ public final class DocxProcessUtil {
                         String tmpText = text.getValue();
                         if (quesUnit == QuesUnit.QUES_BODY) {
                             // 过滤题干标题
-                            if (tmpText.matches("^\\d{1,}\\.")) {
+                            if (tmpText.matches("^\\d{1,}\\.[\\s\\S]*")) {
                                 tmpText = tmpText.replaceFirst("\\d{1,}\\.", "");
                                 text.setValue(tmpText);
                             } else {
-                                tmpText = tmpText.replaceFirst("\\d{1,}", "").replaceFirst("\\.", "");
+                                if(index == 0){
+                                    tmpText = tmpText.replaceFirst("\\d{1,}", "");
+                                }else if(index == 1){
+                                    tmpText = tmpText.replaceFirst("\\.", "");
+                                }
                                 text.setValue(tmpText);
-                                if (index == 2)
+                                if (index == 1) {
                                     break;
+                                }
                             }
 
                         } else if (quesUnit == QuesUnit.QUES_OPTION) {
                             // 过滤选项标题
-                            if (tmpText.matches("^[A-Z]\\.")) {
+                            if (tmpText.matches("^[A-Z]\\.[\\s\\S]*")) {
                                 tmpText = tmpText.replaceFirst("[A-Z]\\.", "");
                                 text.setValue(tmpText);
                             } else {
-                                tmpText = tmpText.replaceFirst("[A-Z]", "").replaceFirst("\\.", "");
+                                if(index == 0){
+                                    tmpText = tmpText.replaceFirst("[A-Z]", "");
+                                }else if(index == 1){
+                                    tmpText = tmpText.replaceFirst("\\.", "");
+                                }
                                 text.setValue(tmpText);
-                                if (index == 2)
+                                if (index == 1) {
                                     break;
+                                }
                             }
 
                         } else if (quesUnit == QuesUnit.QUES_ANSWER) {
                             // 过滤答案标题
-                            if (index < 4) {
+                            if (index < 3) {
                                 tmpText = tmpText.replaceFirst("\\[|\\]", "").replaceFirst("答案", "");
                             } else {
                                 tmpText = tmpText.replaceFirst("[:|:]", "");
                             }
                             text.setValue(tmpText);
-                            if (index == 4)
+                            if (index == 3)
                                 break;
                         }
                     }
                 }
             }
-
         }
         return p;
     }

+ 8 - 5
cqb-paper/src/main/java/com/qmth/cqb/paper/service/ImportPaperService.java

@@ -341,18 +341,21 @@ public class ImportPaperService {
         if (StringUtils.isEmpty(importPaperCheck.quesGroup)) {
             tmpErrorInfo += "试题分类为空或格式不正确\n";
 
-        } else if (StringUtils.isEmpty(importPaperCheck.quesType)) {
+        }
+        if (StringUtils.isEmpty(importPaperCheck.quesType)) {
             tmpErrorInfo += "题型为空或格式不正确\n";
 
-        } else if (StringUtils.isEmpty(importPaperCheck.quesName)) {
+        }
+        if (StringUtils.isEmpty(importPaperCheck.quesName)) {
             tmpErrorInfo += "大题名称为空或格式不正确\n";
 
-        } else if (StringUtils.isEmpty(importPaperCheck.quesCount)) {
+        }
+        if (StringUtils.isEmpty(importPaperCheck.quesCount)) {
             tmpErrorInfo += "题目数量为空或格式不正确\n";
 
-        } else if (StringUtils.isEmpty(importPaperCheck.quesScore)) {
+        }
+        if (StringUtils.isEmpty(importPaperCheck.quesScore)) {
             tmpErrorInfo += "分数为空或格式不正确\n";
-
         }
         if (!StringUtils.isEmpty(importPaperCheck.quesType) && !CommonUtils.checkQuesType(importPaperCheck.quesType)) {
             tmpErrorInfo += "题型必须是单选、多选、判断、填空、问答、套题中的一种\n";

+ 4 - 4
cqb-paper/src/main/java/com/qmth/cqb/paper/service/PaperService.java

@@ -332,9 +332,6 @@ public class PaperService {
 	 */
 	public void formatPaper(Paper paper){
 		List<PaperDetail> paperDetails = paperDetailRepo.findByPaper(paper);
-		//计算试卷总分
-		List<PaperDetailUnit> paperDetailUnitAll = paperDetailUnitRepo.findByPaper(paper);
-		double totalScore = paperDetailUnitAll.stream().mapToDouble(PaperDetailUnit::getScore).sum();
 
 		//计算各大题总分
 		for(PaperDetail paperDetail:paperDetails){
@@ -358,7 +355,10 @@ public class PaperService {
 				paperDetail.setUnitCount(count);
 			}
 		}
-		paper.setUnitCount(paperDetailUnitAll.size());
+		//计算试卷总分
+		double totalScore = paperDetails.stream().mapToDouble(PaperDetail::getScore).sum();
+		int totalCount = paperDetails.stream().mapToInt(PaperDetail::getUnitCount).sum();
+		paper.setUnitCount(totalCount);
 		paper.setPaperDetailCount(paperDetails.size());
 		paper.setTotalScore(totalScore);
 		paperDetailRepo.save(paperDetails);

+ 4 - 6
cqb-paper/src/main/java/com/qmth/cqb/paper/web/ImportPaperController.java

@@ -13,6 +13,8 @@ import com.qmth.cqb.paper.service.ImportPaperService;
 import org.springframework.web.multipart.commons.CommonsMultipartFile;
 
 import java.io.*;
+import java.util.HashMap;
+import java.util.Map;
 
 /**
  * Created by songyue on 16/12/28.
@@ -38,15 +40,11 @@ public class ImportPaperController {
      */
     @ApiOperation(value="导入试卷",notes="导入试卷")
     @PostMapping(value = "/importPaper")
-    public ResponseEntity importPaper(@RequestParam String paperName,
+    public String importPaper(@RequestParam String paperName,
                                       @RequestParam("file") CommonsMultipartFile file){
         File tempFile = importPaperService.getUploadFile(file);
         String returnStr = importPaperService.ImportPaper(paperName,tempFile);
-        if(returnStr.equals("success")){
-            return new ResponseEntity(HttpStatus.OK);
-        }else{
-            return new ResponseEntity(returnStr,HttpStatus.INTERNAL_SERVER_ERROR);
-        }
+        return returnStr;
     }
 
 }

+ 1 - 1
cqb-starter/src/main/resources/application-test.properties

@@ -1,3 +1,3 @@
-spring.data.mongodb.uri=mongodb://localhost:27017/comm-ques-bank
+spring.data.mongodb.uri=mongodb://192.168.1.99:27017/comm-ques-bank
 spring.data.mongodb.grid-fs-database=comm-ques-bank
 eureka.client.serviceUrl.defaultZone=http://localhost:1111/eureka/