|
@@ -272,13 +272,12 @@ public class StringSimilarityUtils {
|
|
|
* @return
|
|
|
*/
|
|
|
public static String stringFilter(String str) {
|
|
|
- String regEx = "[_`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。,、?]";
|
|
|
+ String regEx = "\\&[a-zA-Z]{1,10};|[_`~!@#$%^&*()+=|{}':;',\\[\\].<>/?~!@#¥%……&*()——+|{}【】‘;:”“’。,、?]";
|
|
|
return str.replaceAll(regEx, "").trim();
|
|
|
-
|
|
|
}
|
|
|
|
|
|
public static void main(String[] args) {
|
|
|
- String str1 = "秦汉以i am filter he is hehe abc来的公文程式构成有\n<><>_________________ !!!!!";
|
|
|
+ String str1 = "秦汉以i am filter he is hehe abc来的公文程式构成有\n<><>_________________ !!!!! ©<> ";
|
|
|
String str2 = "More roads than one lead to the mountain village.";
|
|
|
// System.out.println(StringSimilarityUtils.stringFilter(str1));
|
|
|
// System.out.println(StringSimilarityUtils.stringFilter(str2));
|
|
@@ -293,6 +292,7 @@ public class StringSimilarityUtils {
|
|
|
// System.out.println(similarity_cos);
|
|
|
// System.out.println(similarity_dice);
|
|
|
// System.out.println(similarity_diceopt);
|
|
|
+ System.out.println(stringFilter(str1));
|
|
|
System.out.println(segmentText(stringFilter(str1)));
|
|
|
}
|
|
|
}
|