fs-danaus
2024-08-09 7204e3dff0490732e861ccd1338e3e3c31d768c6
提交 | 用户 | age
a6a76f 1 package com.yc.utils;
F 2
3 import java.util.regex.Matcher;  
4 import java.util.regex.Pattern;
5
6 import org.apache.poi.util.StringUtil;  
7   /**
8    * 判断是否为中文乱码,是的就转换过来
9    * 
10    * */
11 public class ChineseUtill {  
12   
13     private static boolean isChinese(char c) {  
14         Character.UnicodeBlock ub = Character.UnicodeBlock.of(c);  
15         if (ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS  
16                 || ub == Character.UnicodeBlock.CJK_COMPATIBILITY_IDEOGRAPHS  
17                 || ub == Character.UnicodeBlock.CJK_UNIFIED_IDEOGRAPHS_EXTENSION_A  
18                 || ub == Character.UnicodeBlock.GENERAL_PUNCTUATION  
19                 || ub == Character.UnicodeBlock.CJK_SYMBOLS_AND_PUNCTUATION  
20                 || ub == Character.UnicodeBlock.HALFWIDTH_AND_FULLWIDTH_FORMS) {  
21             return true;  
22         }  
23         return false;  
24     }  
25       
26     public static boolean isMessyCode(String strName) {  
27         Pattern p = Pattern.compile("\\s*|\t*|\r*|\n*");  
28         Matcher m = p.matcher(strName);  
29         String after = m.replaceAll("");  
30         String temp = after.replaceAll("\\p{P}", "");  
31         char[] ch = temp.trim().toCharArray();  
32         float chLength = 0 ;  
33         float count = 0;  
34         for (int i = 0; i < ch.length; i++) {  
35             char c = ch[i];  
36             if (!Character.isLetterOrDigit(c)||(c+"").matches("[\u4e00-\u9fa5]+")) {  
37             //&&!(c+"").matches("[\u4e00-\u9fa5]+")
38                 if (!isChinese(c)) {  
39                     count = count + 1;  
40                 }  
41                 chLength++;   
42             }  
43         }  
44         float result = count / chLength ;  
45         if (result > 0.4) {  
46             return true;  
47         } else {  
48             return false;  
49         }  
50     }  
51       
52       
53     public static String tranfrom(String tempMsg){  
54        // String tempMsg = TransformUtils.toString(msg) ;  
55         if(isMessyCode(tempMsg)){  
56             try {  
57                 return new String(tempMsg.getBytes("GBK"), "UTF-8");  
58             } catch (Exception e) {  
59             }  
60         }  
61         return tempMsg ;   
62     }
63     public static String toChinese(String tempMsg){  
64             String str=ChineseUtill.tranfrom(tempMsg);
65              if(tempMsg.length()<str.length()) str=tempMsg;
66              return str;
67         }
68     public static void main(String[] args){
69     //String tempMsg="<a class=\"easyui-linkbutton\" editstatus=\"\" onClick=\"funcLink('9922','1','','','1','','','null','','','1','','','1');\">鏂板缓瑙掕壊</a><a class=\"easyui-linkbutton\" editstatus=\"\" onClick=\"funcLink('9649','22','','','2','','','null','','','1','','','2');\">鎵归噺澶嶅埗鐢ㄦ埛鏉冮檺</a><a class=\"easyui-linkbutton\" editstatus=\"\" onClick=\"funcLink('9650','22','','','1','','','null','','','1','','','3');\">鍒锋柊鍔熻兘妯″潡锛堣鑹诧級鏉冮檺鍒楄〃</a><a class=\"easyui-linkbutton\" editstatus=\"\" onClick=\"funcLink('9925','22','','','0','','','null','','','1','','','4');\">澶嶅埗瑙掕壊</a>";
70         
71     String str="03\",\"begintime\":\"14:00:00            \",\"qjenddate\":\"2016-11-05\",\"endtime\":\"18:00:00            \",\"qjnumber\"";
72        //  String str=ChineseUtill.toChinese(tempMsg);
73     System.out.println(str.length());
74     String s=str.replaceAll("\"\\s+$\\b", "");
75     System.out.println(s);
76     System.out.println(s.length());
77     
78         
79         
80 //        
81 //    String str_VarMboxRead = "鍒锋柊鍔熻兘妯″潡锛堣鑹诧級鏉冮檺鍒楄〃 dsddd                    ʻ    ʼ   <p> ";  
82 //    System.out.println("change chinese before VarMboxRead="  
83 //            + str_VarMboxRead);  
84 //       //去掉中午字符, ("[\u4e00-\u9fa5]+")这个字符区间,所有 ASCII:[\x00-\x7F]   
85 //    String str_Result = "", str_OneStr = "";  
86 //    for (int z = 0; z < str_VarMboxRead.length(); z++) {  
87 //        str_OneStr = str_VarMboxRead.substring(z, z + 1);  
88 //                if (!str_OneStr.matches("[\u4e00-\u9fa5]+")) {  
89 //        if (str_OneStr.matches("[\\x00-\\x7F]+")) {  
90 //            str_Result = str_Result + str_OneStr;  
91 //        }  
92 //    }else{
93 //        str_Result+=ChineseUtill.toChinese(str_OneStr);
94 //    }
95 //    System.out.println("change chinese after VarMboxRead="  
96 //            + str_Result);  
97 //    
98 //    }
99 //    
100     }
101 }