提交 | 用户 | age
|
a6a76f
|
1 |
package com.yc.utils; |
F |
2 |
|
|
3 |
import java.util.regex.Matcher; |
|
4 |
import java.util.regex.Pattern; |
|
5 |
|
|
6 |
import org.apache.poi.util.StringUtil; |
|
7 |
/** |
|
8 |
* 判断是否为中文乱码,是的就转换过来 |
|
9 |
* |
|
10 |
* */ |
|
11 |
public class ChineseUtill { |
|
12 |
|
|
13 |
private static boolean isChinese(char c) { |
|
14 |
Character.UnicodeBlock ub = Character.UnicodeBlock.of(c); |
|
15 |
if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS |
|
16 |
|| ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS |
|
17 |
|| ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A |
|
18 |
|| ub == Character.UnicodeBlock.GENERAL_PUNCTUATION |
|
19 |
|| ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION |
|
20 |
|| ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) { |
|
21 |
return true; |
|
22 |
} |
|
23 |
return false; |
|
24 |
} |
|
25 |
|
|
26 |
public static boolean isMessyCode(String strName) { |
|
27 |
Pattern p = Pattern.compile("\\s*|\t*|\r*|\n*"); |
|
28 |
Matcher m = p.matcher(strName); |
|
29 |
String after = m.replaceAll(""); |
|
30 |
String temp = after.replaceAll("\\p{P}", ""); |
|
31 |
char[] ch = temp.trim().toCharArray(); |
|
32 |
float chLength = 0 ; |
|
33 |
float count = 0; |
|
34 |
for (int i = 0; i < ch.length; i++) { |
|
35 |
char c = ch[i]; |
|
36 |
if (!Character.isLetterOrDigit(c)||(c+"").matches("[\u4e00-\u9fa5]+")) { |
|
37 |
//&&!(c+"").matches("[\u4e00-\u9fa5]+") |
|
38 |
if (!isChinese(c)) { |
|
39 |
count = count + 1; |
|
40 |
} |
|
41 |
chLength++; |
|
42 |
} |
|
43 |
} |
|
44 |
float result = count / chLength ; |
|
45 |
if (result > 0.4) { |
|
46 |
return true; |
|
47 |
} else { |
|
48 |
return false; |
|
49 |
} |
|
50 |
} |
|
51 |
|
|
52 |
|
|
53 |
public static String tranfrom(String tempMsg){ |
|
54 |
// String tempMsg = TransformUtils.toString(msg) ; |
|
55 |
if(isMessyCode(tempMsg)){ |
|
56 |
try { |
|
57 |
return new String(tempMsg.getBytes("GBK"), "UTF-8"); |
|
58 |
} catch (Exception e) { |
|
59 |
} |
|
60 |
} |
|
61 |
return tempMsg ; |
|
62 |
} |
|
63 |
public static String toChinese(String tempMsg){ |
|
64 |
String str=ChineseUtill.tranfrom(tempMsg); |
|
65 |
if(tempMsg.length()<str.length()) str=tempMsg; |
|
66 |
return str; |
|
67 |
} |
|
68 |
public static void main(String[] args){ |
|
69 |
//String tempMsg="<a class=\"easyui-linkbutton\" editstatus=\"\" onClick=\"funcLink('9922','1','','','1','','','null','','','1','','','1');\">鏂板缓瑙掕壊</a><a class=\"easyui-linkbutton\" editstatus=\"\" onClick=\"funcLink('9649','22','','','2','','','null','','','1','','','2');\">鎵归噺澶嶅埗鐢ㄦ埛鏉冮檺</a><a class=\"easyui-linkbutton\" editstatus=\"\" onClick=\"funcLink('9650','22','','','1','','','null','','','1','','','3');\">鍒锋柊鍔熻兘妯″潡锛堣鑹诧級鏉冮檺鍒楄〃</a><a class=\"easyui-linkbutton\" editstatus=\"\" onClick=\"funcLink('9925','22','','','0','','','null','','','1','','','4');\">澶嶅埗瑙掕壊</a>"; |
|
70 |
|
|
71 |
String str="03\",\"begintime\":\"14:00:00 \",\"qjenddate\":\"2016-11-05\",\"endtime\":\"18:00:00 \",\"qjnumber\""; |
|
72 |
// String str=ChineseUtill.toChinese(tempMsg); |
|
73 |
System.out.println(str.length()); |
|
74 |
String s=str.replaceAll("\"\\s+$\\b", ""); |
|
75 |
System.out.println(s); |
|
76 |
System.out.println(s.length()); |
|
77 |
|
|
78 |
|
|
79 |
|
|
80 |
// |
|
81 |
// String str_VarMboxRead = "鍒锋柊鍔熻兘妯″潡锛堣鑹诧級鏉冮檺鍒楄〃 dsddd ʻ ʼ <p> "; |
|
82 |
// System.out.println("change chinese before VarMboxRead=" |
|
83 |
// + str_VarMboxRead); |
|
84 |
// //去掉中午字符, ("[\u4e00-\u9fa5]+")这个字符区间,所有 ASCII:[\x00-\x7F] |
|
85 |
// String str_Result = "", str_OneStr = ""; |
|
86 |
// for (int z = 0; z < str_VarMboxRead.length(); z++) { |
|
87 |
// str_OneStr = str_VarMboxRead.substring(z, z + 1); |
|
88 |
// if (!str_OneStr.matches("[\u4e00-\u9fa5]+")) { |
|
89 |
// if (str_OneStr.matches("[\\x00-\\x7F]+")) { |
|
90 |
// str_Result = str_Result + str_OneStr; |
|
91 |
// } |
|
92 |
// }else{ |
|
93 |
// str_Result+=ChineseUtill.toChinese(str_OneStr); |
|
94 |
// } |
|
95 |
// System.out.println("change chinese after VarMboxRead=" |
|
96 |
// + str_Result); |
|
97 |
// |
|
98 |
// } |
|
99 |
// |
|
100 |
} |
|
101 |
} |