|
@@ -1,276 +1,300 @@
|
|
|
package com.qmth.cqb.utils;
|
|
|
|
|
|
+import java.util.ArrayList;
|
|
|
+import java.util.Arrays;
|
|
|
+import java.util.HashMap;
|
|
|
+import java.util.HashSet;
|
|
|
+import java.util.Iterator;
|
|
|
+import java.util.List;
|
|
|
+import java.util.Map;
|
|
|
+import java.util.Set;
|
|
|
+
|
|
|
import org.ansj.domain.Result;
|
|
|
import org.ansj.domain.Term;
|
|
|
import org.ansj.splitWord.analysis.ToAnalysis;
|
|
|
-
|
|
|
-import java.util.*;
|
|
|
+import org.apache.commons.lang3.StringUtils;
|
|
|
|
|
|
/**
|
|
|
* 计算相似度工具包:
|
|
|
+ *
|
|
|
* @author songyue
|
|
|
* @date 2016-05-11
|
|
|
*/
|
|
|
public class StringSimilarityUtils {
|
|
|
|
|
|
- /**
|
|
|
- * 对输入字符串分词
|
|
|
- * @param str
|
|
|
- * @return ArrayList
|
|
|
- * @author songyue
|
|
|
- */
|
|
|
- public static List<String> segmentText(String str) {
|
|
|
- List<String> segResult = new ArrayList<String>();// 分词结果
|
|
|
- Result result = ToAnalysis.parse(str);
|
|
|
- List<Term> terms = result.getTerms();
|
|
|
- for(Term term:terms){
|
|
|
- segResult.add(term.getName());
|
|
|
- }
|
|
|
- return segResult;
|
|
|
- }
|
|
|
+ /**
|
|
|
+ * 对输入字符串分词
|
|
|
+ *
|
|
|
+ * @param str
|
|
|
+ * @return ArrayList
|
|
|
+ * @author songyue
|
|
|
+ */
|
|
|
+ public static List<String> segmentText(String str) {
|
|
|
+ List<String> segResult = new ArrayList<String>();// 分词结果
|
|
|
+ Result result = ToAnalysis.parse(str);
|
|
|
+ List<Term> terms = result.getTerms();
|
|
|
+ for (Term term : terms) {
|
|
|
+ segResult.add(term.getName());
|
|
|
+ }
|
|
|
+ return segResult;
|
|
|
+ }
|
|
|
+
|
|
|
+ /**
|
|
|
+ * 计算相似度(两个分词集合,分词匹配,算法为余弦定理)
|
|
|
+ *
|
|
|
+ * @param seg1
|
|
|
+ * @param seg2
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static double getSimilarityWithCosinesBySeg(String seg1, String seg2) {
|
|
|
+ double similarity = 0;
|
|
|
+ int size1 = 0;
|
|
|
+ int size2 = 0;
|
|
|
+ seg1 = stringFilter(seg1);
|
|
|
+ seg2 = stringFilter(seg2);
|
|
|
+ List<String> w1 = segmentText(seg1);
|
|
|
+ List<String> w2 = segmentText(seg2);
|
|
|
+ if (w1 != null && (size1 = w1.size()) != 0 && w2 != null && (size2 = w2.size()) != 0) {
|
|
|
+ Map<String, int[]> countMap = new HashMap<String, int[]>();
|
|
|
+ String index = null;
|
|
|
+ // 将w1与w2分词出现频次统计入coutMap中
|
|
|
+ for (int i = 0; i < size1; i++) {
|
|
|
+ index = w1.get(i);
|
|
|
+ if (index != null) {
|
|
|
+ int[] c = countMap.get(index);
|
|
|
+ if (c != null && c.length == 2) {
|
|
|
+ c[0]++;
|
|
|
+ } else {
|
|
|
+ c = new int[2];
|
|
|
+ c[0] = 1;
|
|
|
+ c[1] = 0;
|
|
|
+ countMap.put(index, c);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ for (int i = 0; i < size2; i++) {
|
|
|
+ index = w2.get(i);
|
|
|
+ if (index != null) {
|
|
|
+ int[] c = countMap.get(index);
|
|
|
+ if (c != null && c.length == 2) {
|
|
|
+ c[1]++;
|
|
|
+ } else {
|
|
|
+ c = new int[2];
|
|
|
+ c[0] = 0;
|
|
|
+ c[1] = 1;
|
|
|
+ countMap.put(index, c);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 根据余弦定理计算相似度
|
|
|
+ Iterator<String> it = countMap.keySet().iterator();
|
|
|
+ double sum = 0;
|
|
|
+ double s1 = 0;
|
|
|
+ double s2 = 0;
|
|
|
+ while (it.hasNext()) {
|
|
|
+ int[] c = countMap.get(it.next());
|
|
|
+ sum += c[0] * c[1];
|
|
|
+ s1 += c[0] * c[0];
|
|
|
+ s2 += c[1] * c[1];
|
|
|
+ }
|
|
|
+ similarity = sum / Math.sqrt(s1 * s2);
|
|
|
+ } else {
|
|
|
+ throw new NullPointerException("传入的参数为空");
|
|
|
+ }
|
|
|
+ return similarity;
|
|
|
+ }
|
|
|
|
|
|
- /**
|
|
|
- * 计算相似度(两个分词集合,分词匹配,算法为余弦定理)
|
|
|
- * @param seg1
|
|
|
- * @param seg2
|
|
|
- * @return
|
|
|
- */
|
|
|
- public static double getSimilarityWithCosinesBySeg(String seg1, String seg2) {
|
|
|
- double similarity = 0;
|
|
|
- int size1 = 0;
|
|
|
- int size2 = 0;
|
|
|
- seg1 = stringFilter(seg1);
|
|
|
- seg2 = stringFilter(seg2);
|
|
|
- List<String> w1 = segmentText(seg1);
|
|
|
- List<String> w2 = segmentText(seg2);
|
|
|
- if (w1 != null && (size1 = w1.size()) != 0 && w2 != null && (size2 = w2.size()) != 0) {
|
|
|
- Map<String, int[]> countMap = new HashMap<String, int[]>();
|
|
|
- String index = null;
|
|
|
- // 将w1与w2分词出现频次统计入coutMap中
|
|
|
- for (int i = 0; i < size1; i++) {
|
|
|
- index = w1.get(i);
|
|
|
- if (index != null) {
|
|
|
- int[] c = countMap.get(index);
|
|
|
- if (c != null && c.length == 2) {
|
|
|
- c[0]++;
|
|
|
- } else {
|
|
|
- c = new int[2];
|
|
|
- c[0] = 1;
|
|
|
- c[1] = 0;
|
|
|
- countMap.put(index, c);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- for (int i = 0; i < size2; i++) {
|
|
|
- index = w2.get(i);
|
|
|
- if (index != null) {
|
|
|
- int[] c = countMap.get(index);
|
|
|
- if (c != null && c.length == 2) {
|
|
|
- c[1]++;
|
|
|
- } else {
|
|
|
- c = new int[2];
|
|
|
- c[0] = 0;
|
|
|
- c[1] = 1;
|
|
|
- countMap.put(index, c);
|
|
|
- }
|
|
|
- }
|
|
|
- }
|
|
|
- // 根据余弦定理计算相似度
|
|
|
- Iterator<String> it = countMap.keySet().iterator();
|
|
|
- double sum = 0;
|
|
|
- double s1 = 0;
|
|
|
- double s2 = 0;
|
|
|
- while (it.hasNext()) {
|
|
|
- int[] c = countMap.get(it.next());
|
|
|
- sum += c[0] * c[1];
|
|
|
- s1 += c[0] * c[0];
|
|
|
- s2 += c[1] * c[1];
|
|
|
- }
|
|
|
- similarity = sum / Math.sqrt(s1 * s2);
|
|
|
- } else {
|
|
|
- throw new NullPointerException("传入的参数为空");
|
|
|
- }
|
|
|
- return similarity;
|
|
|
- }
|
|
|
+ /**
|
|
|
+ * 计算相似度(两个字符串,全字匹配,算法为余弦定理)
|
|
|
+ *
|
|
|
+ * @param w1
|
|
|
+ * @param w2
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static double getSimilarityWithCosinesByWords(String w1, String w2) {
|
|
|
+ double similarity = 0;
|
|
|
+ int size1 = 0;
|
|
|
+ int size2 = 0;
|
|
|
+ w1 = stringFilter(w1);
|
|
|
+ w2 = stringFilter(w2);
|
|
|
+ if (w1 != null && (size1 = w1.length()) != 0 && w2 != null && (size2 = w2.length()) != 0) {
|
|
|
+ Map<Character, int[]> countMap = new HashMap<Character, int[]>();
|
|
|
+ char index;
|
|
|
+ // 将w1与w2所有字符出现频次统计入countMap中
|
|
|
+ for (int i = 0; i < size1; i++) {
|
|
|
+ index = w1.charAt(i);
|
|
|
+ int[] c = countMap.get(index);
|
|
|
+ if (c != null && c.length == 2) {
|
|
|
+ c[0]++;
|
|
|
+ } else {
|
|
|
+ c = new int[2];
|
|
|
+ c[0] = 1;
|
|
|
+ c[1] = 0;
|
|
|
+ countMap.put(index, c);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ for (int i = 0; i < size2; i++) {
|
|
|
+ index = w2.charAt(i);
|
|
|
+ int[] c = countMap.get(index);
|
|
|
+ if (c != null && c.length == 2) {
|
|
|
+ c[1]++;
|
|
|
+ } else {
|
|
|
+ c = new int[2];
|
|
|
+ c[0] = 0;
|
|
|
+ c[1] = 1;
|
|
|
+ countMap.put(index, c);
|
|
|
+ }
|
|
|
+ }
|
|
|
+ // 根据余弦定理计算相似度
|
|
|
+ Iterator<Character> it = countMap.keySet().iterator();
|
|
|
+ double sum = 0;
|
|
|
+ double s1 = 0;
|
|
|
+ double s2 = 0;
|
|
|
+ while (it.hasNext()) {
|
|
|
+ int[] c = countMap.get(it.next());
|
|
|
+ sum += c[0] * c[1];
|
|
|
+ s1 += c[0] * c[0];
|
|
|
+ s2 += c[1] * c[1];
|
|
|
+ }
|
|
|
+ similarity = sum / Math.sqrt(s1 * s2);
|
|
|
+ } else {
|
|
|
+ throw new NullPointerException("传入的参数为空");
|
|
|
+ }
|
|
|
+ return similarity;
|
|
|
+ }
|
|
|
|
|
|
- /**
|
|
|
- * 计算相似度(两个字符串,全字匹配,算法为余弦定理)
|
|
|
- * @param w1
|
|
|
- * @param w2
|
|
|
- * @return
|
|
|
- */
|
|
|
- public static double getSimilarityWithCosinesByWords(String w1, String w2) {
|
|
|
- double similarity = 0;
|
|
|
- int size1 = 0;
|
|
|
- int size2 = 0;
|
|
|
- w1 = stringFilter(w1);
|
|
|
- w2 = stringFilter(w2);
|
|
|
- if (w1 != null && (size1 = w1.length()) != 0 && w2 != null && (size2 = w2.length()) != 0) {
|
|
|
- Map<Character, int[]> countMap = new HashMap<Character, int[]>();
|
|
|
- char index;
|
|
|
- // 将w1与w2所有字符出现频次统计入countMap中
|
|
|
- for (int i = 0; i < size1; i++) {
|
|
|
- index = w1.charAt(i);
|
|
|
- int[] c = countMap.get(index);
|
|
|
- if (c != null && c.length == 2) {
|
|
|
- c[0]++;
|
|
|
- } else {
|
|
|
- c = new int[2];
|
|
|
- c[0] = 1;
|
|
|
- c[1] = 0;
|
|
|
- countMap.put(index, c);
|
|
|
- }
|
|
|
- }
|
|
|
- for (int i = 0; i < size2; i++) {
|
|
|
- index = w2.charAt(i);
|
|
|
- int[] c = countMap.get(index);
|
|
|
- if (c != null && c.length == 2) {
|
|
|
- c[1]++;
|
|
|
- } else {
|
|
|
- c = new int[2];
|
|
|
- c[0] = 0;
|
|
|
- c[1] = 1;
|
|
|
- countMap.put(index, c);
|
|
|
- }
|
|
|
- }
|
|
|
- // 根据余弦定理计算相似度
|
|
|
- Iterator<Character> it = countMap.keySet().iterator();
|
|
|
- double sum = 0;
|
|
|
- double s1 = 0;
|
|
|
- double s2 = 0;
|
|
|
- while (it.hasNext()) {
|
|
|
- int[] c = countMap.get(it.next());
|
|
|
- sum += c[0] * c[1];
|
|
|
- s1 += c[0] * c[0];
|
|
|
- s2 += c[1] * c[1];
|
|
|
- }
|
|
|
- similarity = sum / Math.sqrt(s1 * s2);
|
|
|
- } else {
|
|
|
- throw new NullPointerException("传入的参数为空");
|
|
|
- }
|
|
|
- return similarity;
|
|
|
- }
|
|
|
+ /**
|
|
|
+ * 计算相似度(两个字符串,采用优化Dice算法)
|
|
|
+ *
|
|
|
+ * @param w1
|
|
|
+ * @param w2
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static double getSimilarityWithDiceOptByWords(String w1, String w2) {
|
|
|
+ if (w1 == null || w2 == null || w1.length() == 0 || w2.length() == 0)
|
|
|
+ return 0;
|
|
|
+ if (w1 == w2)
|
|
|
+ return 1;
|
|
|
+ if (w1.length() == 1 || w2.length() == 1) {
|
|
|
+ if (w1.equals(w2)) {
|
|
|
+ return 1;
|
|
|
+ } else {
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ w1 = stringFilter(w1);
|
|
|
+ w2 = stringFilter(w2);
|
|
|
+ final int n = w1.length() - 1;
|
|
|
+ final int[] sPairs = new int[n];
|
|
|
+ for (int i = 0; i <= n; i++)
|
|
|
+ if (i == 0)
|
|
|
+ sPairs[i] = w1.charAt(i) << 16;
|
|
|
+ else if (i == n)
|
|
|
+ sPairs[i - 1] |= w1.charAt(i);
|
|
|
+ else
|
|
|
+ sPairs[i] = (sPairs[i - 1] |= w1.charAt(i)) << 16;
|
|
|
|
|
|
- /**
|
|
|
- * 计算相似度(两个字符串,采用优化Dice算法)
|
|
|
- * @param w1
|
|
|
- * @param w2
|
|
|
- * @return
|
|
|
- */
|
|
|
- public static double getSimilarityWithDiceOptByWords(String w1, String w2) {
|
|
|
- if (w1 == null || w2 == null || w1.length() == 0 || w2.length() == 0)
|
|
|
- return 0;
|
|
|
- if (w1 == w2)
|
|
|
- return 1;
|
|
|
- if (w1.length() == 1 || w2.length() == 1){
|
|
|
- if (w1.equals(w2)) {
|
|
|
- return 1;
|
|
|
- } else {
|
|
|
- return 0;
|
|
|
- }
|
|
|
- }
|
|
|
- w1 = stringFilter(w1);
|
|
|
- w2 = stringFilter(w2);
|
|
|
- final int n = w1.length() - 1;
|
|
|
- final int[] sPairs = new int[n];
|
|
|
- for (int i = 0; i <= n; i++)
|
|
|
- if (i == 0)
|
|
|
- sPairs[i] = w1.charAt(i) << 16;
|
|
|
- else if (i == n)
|
|
|
- sPairs[i - 1] |= w1.charAt(i);
|
|
|
- else
|
|
|
- sPairs[i] = (sPairs[i - 1] |= w1.charAt(i)) << 16;
|
|
|
+ final int m = w2.length() - 1;
|
|
|
+ final int[] tPairs = new int[m];
|
|
|
+ for (int i = 0; i <= m; i++)
|
|
|
+ if (i == 0)
|
|
|
+ tPairs[i] = w2.charAt(i) << 16;
|
|
|
+ else if (i == m)
|
|
|
+ tPairs[i - 1] |= w2.charAt(i);
|
|
|
+ else
|
|
|
+ tPairs[i] = (tPairs[i - 1] |= w2.charAt(i)) << 16;
|
|
|
|
|
|
- final int m = w2.length() - 1;
|
|
|
- final int[] tPairs = new int[m];
|
|
|
- for (int i = 0; i <= m; i++)
|
|
|
- if (i == 0)
|
|
|
- tPairs[i] = w2.charAt(i) << 16;
|
|
|
- else if (i == m)
|
|
|
- tPairs[i - 1] |= w2.charAt(i);
|
|
|
- else
|
|
|
- tPairs[i] = (tPairs[i - 1] |= w2.charAt(i)) << 16;
|
|
|
+ Arrays.sort(sPairs);
|
|
|
+ Arrays.sort(tPairs);
|
|
|
|
|
|
- Arrays.sort(sPairs);
|
|
|
- Arrays.sort(tPairs);
|
|
|
+ int matches = 0, i = 0, j = 0;
|
|
|
+ while (i < n && j < m) {
|
|
|
+ if (sPairs[i] == tPairs[j]) {
|
|
|
+ matches += 2;
|
|
|
+ i++;
|
|
|
+ j++;
|
|
|
+ } else if (sPairs[i] < tPairs[j])
|
|
|
+ i++;
|
|
|
+ else
|
|
|
+ j++;
|
|
|
+ }
|
|
|
+ return (double) matches / (n + m);
|
|
|
+ }
|
|
|
|
|
|
- int matches = 0, i = 0, j = 0;
|
|
|
- while (i < n && j < m) {
|
|
|
- if (sPairs[i] == tPairs[j]) {
|
|
|
- matches += 2;
|
|
|
- i++;
|
|
|
- j++;
|
|
|
- } else if (sPairs[i] < tPairs[j])
|
|
|
- i++;
|
|
|
- else
|
|
|
- j++;
|
|
|
- }
|
|
|
- return (double) matches / (n + m);
|
|
|
- }
|
|
|
+ /**
|
|
|
+ * 计算相似度(两个字符串,采用一般Dice算法)
|
|
|
+ *
|
|
|
+ * @param w1
|
|
|
+ * @param w2
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static double getSimilarityWithDiceByWords(String w1, String w2) {
|
|
|
+ double similarity = 0;
|
|
|
+ if (w1 != null && w1.length() != 0 && w2 != null && w2.length() != 0) {
|
|
|
+ if (w1.length() == 1 || w2.length() == 1) {
|
|
|
+ if (w1.equals(w2)) {
|
|
|
+ return 1;
|
|
|
+ } else {
|
|
|
+ return 0;
|
|
|
+ }
|
|
|
+ }
|
|
|
+ w1 = stringFilter(w1);
|
|
|
+ w2 = stringFilter(w2);
|
|
|
+ Set<String> nx = new HashSet<String>();
|
|
|
+ Set<String> ny = new HashSet<String>();
|
|
|
|
|
|
- /**
|
|
|
- * 计算相似度(两个字符串,采用一般Dice算法)
|
|
|
- * @param w1
|
|
|
- * @param w2
|
|
|
- * @return
|
|
|
- */
|
|
|
- public static double getSimilarityWithDiceByWords(String w1, String w2) {
|
|
|
- double similarity = 0;
|
|
|
- if (w1 != null && w1.length() != 0 && w2 != null && w2.length() != 0) {
|
|
|
- if (w1.length() == 1 || w2.length() == 1){
|
|
|
- if (w1.equals(w2)) {
|
|
|
- return 1;
|
|
|
- } else {
|
|
|
- return 0;
|
|
|
- }
|
|
|
- }
|
|
|
- w1 = stringFilter(w1);
|
|
|
- w2 = stringFilter(w2);
|
|
|
- Set<String> nx = new HashSet<String>();
|
|
|
- Set<String> ny = new HashSet<String>();
|
|
|
+ for (int i = 0; i < w1.length() - 1; i++) {
|
|
|
+ char x1 = w1.charAt(i);
|
|
|
+ char x2 = w1.charAt(i + 1);
|
|
|
+ String tmp = "" + x1 + x2;
|
|
|
+ nx.add(tmp);
|
|
|
+ }
|
|
|
+ for (int j = 0; j < w2.length() - 1; j++) {
|
|
|
+ char y1 = w2.charAt(j);
|
|
|
+ char y2 = w2.charAt(j + 1);
|
|
|
+ String tmp = "" + y1 + y2;
|
|
|
+ ny.add(tmp);
|
|
|
+ }
|
|
|
+ Set<String> intersection = new HashSet<String>(nx);
|
|
|
+ intersection.retainAll(ny);
|
|
|
+ double totcombigrams = intersection.size();
|
|
|
+ similarity = (2 * totcombigrams) / (nx.size() + ny.size());
|
|
|
+ }
|
|
|
+ return similarity;
|
|
|
+ }
|
|
|
|
|
|
- for (int i = 0; i < w1.length() - 1; i++) {
|
|
|
- char x1 = w1.charAt(i);
|
|
|
- char x2 = w1.charAt(i + 1);
|
|
|
- String tmp = "" + x1 + x2;
|
|
|
- nx.add(tmp);
|
|
|
- }
|
|
|
- for (int j = 0; j < w2.length() - 1; j++) {
|
|
|
- char y1 = w2.charAt(j);
|
|
|
- char y2 = w2.charAt(j + 1);
|
|
|
- String tmp = "" + y1 + y2;
|
|
|
- ny.add(tmp);
|
|
|
- }
|
|
|
- Set<String> intersection = new HashSet<String>(nx);
|
|
|
- intersection.retainAll(ny);
|
|
|
- double totcombigrams = intersection.size();
|
|
|
- similarity = (2 * totcombigrams) / (nx.size() + ny.size());
|
|
|
- }
|
|
|
- return similarity;
|
|
|
- }
|
|
|
+ /**
|
|
|
+ * 过滤特殊字符
|
|
|
+ *
|
|
|
+ * @param str
|
|
|
+ * @return
|
|
|
+ */
|
|
|
+ public static String stringFilter(String str) {
|
|
|
+ if (StringUtils.isNotBlank(str)) {
|
|
|
+ String regEx = "[_`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。,、?]";
|
|
|
+ return str.replaceAll("\\s*", "").replaceAll(regEx, "");
|
|
|
+ } else {
|
|
|
+ return null;
|
|
|
+ }
|
|
|
|
|
|
- /**
|
|
|
- * 过滤特殊字符
|
|
|
- * @param str
|
|
|
- * @return
|
|
|
- */
|
|
|
- public static String stringFilter(String str) {
|
|
|
- String regEx = "[_`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。,、?]";
|
|
|
- return str.replaceAll("\\s*", "").replaceAll(regEx, "");
|
|
|
- }
|
|
|
+ }
|
|
|
|
|
|
- public static void main(String[] args) {
|
|
|
- String str1 = "秦汉以 来的公文程式构 成有 :::::\n <><>_________________ !!!!!";
|
|
|
- String str2 = "明清以来的公文程式构成有";
|
|
|
-// System.out.println(StringSimilarityUtils.stringFilter(str1));
|
|
|
-// System.out.println(StringSimilarityUtils.stringFilter(str2));
|
|
|
-// //double similarity1 = StringSimilarityUtils.getSimilarityWithCosinesBySeg(str1, str2);
|
|
|
-// double similarity_cos = StringSimilarityUtils.getSimilarityWithCosinesByWords(str1, str2);
|
|
|
-// double similarity_dice = StringSimilarityUtils.getSimilarityWithDiceByWords(str1, str2);
|
|
|
-// double similarity_diceopt = StringSimilarityUtils.getSimilarityWithDiceOptByWords(str1, str2);
|
|
|
-// System.out.println(similarity_cos);
|
|
|
-// System.out.println(similarity_dice);
|
|
|
-// System.out.println(similarity_diceopt);
|
|
|
- System.out.println(segmentText(str2));
|
|
|
- }
|
|
|
+ public static void main(String[] args) {
|
|
|
+ String str1 = "秦汉以 来的公文程式构 成有 :::::\n <><>_________________ !!!!!";
|
|
|
+ String str2 = "明清以来的公文程式构成有";
|
|
|
+ // System.out.println(StringSimilarityUtils.stringFilter(str1));
|
|
|
+ // System.out.println(StringSimilarityUtils.stringFilter(str2));
|
|
|
+ // //double similarity1 =
|
|
|
+ // StringSimilarityUtils.getSimilarityWithCosinesBySeg(str1, str2);
|
|
|
+ // double similarity_cos =
|
|
|
+ // StringSimilarityUtils.getSimilarityWithCosinesByWords(str1, str2);
|
|
|
+ // double similarity_dice =
|
|
|
+ // StringSimilarityUtils.getSimilarityWithDiceByWords(str1, str2);
|
|
|
+ // double similarity_diceopt =
|
|
|
+ // StringSimilarityUtils.getSimilarityWithDiceOptByWords(str1, str2);
|
|
|
+ // System.out.println(similarity_cos);
|
|
|
+ // System.out.println(similarity_dice);
|
|
|
+ // System.out.println(similarity_diceopt);
|
|
|
+ System.out.println(segmentText(str2));
|
|
|
+ }
|
|
|
}
|