2 files modified
34 files added
| | |
| | | user.setAvatar(FtpConfig.ip + user.getAvatar().substring(26)); |
| | | } |
| | | |
| | | String s = "insert into blade_user(id,tenant_id,account,password,name,real_name,avatar,email,phone,sex,role_id,dept_id,cardid,nativePlace,nation,fingerprint,education," + |
| | | "politicaloutlook,healstats,height,address,registered,rtime,securitynumber,hold,jurisdiction,examination_type,status,is_deleted,dispatch) " + |
| | | String s = "insert into blade_user(id,tenant_id,account,password,name,real_name,avatar,email,phone,sex,role_id,dept_id,cardid," + |
| | | "jurisdiction,examination_type,status,is_deleted) " + |
| | | "values(" + "'" + user.getId() + "'" + "," + "'" + user.getTenantId() + "'" + "," + "'" + user.getAccount() + "'" + "," + |
| | | "'" + user.getPassword() + "'" + "," + "'" + user.getName() + "'" + "," + "'" + user.getRealName() + "'" + "," + "'" + user.getAvatar() + "'" + "," + |
| | | "'" + user.getEmail() + "'" + "," + "'" + user.getPhone() + "'" + "," + "'" + user.getSex() + "'" + "," + "'" + user.getRoleId() + "'" + |
| | | "," + "'" + user.getDeptId() + "'" + |
| | | "," + "'" + user.getCardid() + "'" + |
| | | "," + "'" + user.getJurisdiction() + "'" + |
| | | "," + "'" + user.getExamination_type() + "'" + |
| | | "," + "'" + user.getStatus() + "'" + |
| | | "," + "'" + user.getIsDeleted() + "'" + ")"; |
| | | System.out.println(s); |
| | | FtpUtil.sqlFileUpload(s); |
| | | return R.status(status); |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words; |
| | | |
| | | import org.springframework.core.io.ClassPathResource; |
| | | import org.springframework.util.StopWatch; |
| | | |
| | | import java.io.*; |
| | | import java.nio.charset.StandardCharsets; |
| | | import java.nio.file.Files; |
| | | import java.nio.file.Paths; |
| | | import java.util.ArrayList; |
| | | import java.util.Arrays; |
| | | import java.util.List; |
| | | import java.util.function.Function; |
| | | import java.util.stream.Stream; |
| | | |
| | | public class DemoApplication { |
| | | |
| | | public static void main(String[] args) throws Exception { |
| | | |
| | | test_StringSearch(); |
| | | test_WordsSearch(); |
| | | |
| | | // test_StringSearchEx(); |
| | | // test_WordsSearchEx(); |
| | | // |
| | | // test_StringSearchEx2(); |
| | | // test_WordsSearchEx2(); |
| | | // test_IllegalWordsSearch(); |
| | | // |
| | | // test_StringMatch(); |
| | | // test_WordsMatch(); |
| | | // |
| | | // test_StringMatchEx(); |
| | | // test_WordsMatchEx(); |
| | | // |
| | | // test_PinyinMatch(); |
| | | // test_PinyinMatch2(); |
| | | // |
| | | // test_Pinyin(); |
| | | // test_words(); |
| | | |
| | | // try { |
| | | // test_save_load(); |
| | | // test_IllegalWordsSearch_loadWordsFormBinaryFile(); |
| | | // } catch (Exception e) { |
| | | // e.printStackTrace(); |
| | | // } |
| | | // test_times(); |
| | | |
| | | // test_issues_54(); |
| | | // test_issues_57(); |
| | | // test_issues_57_2(); |
| | | // test_issues_57_3(); |
| | | // test_issues_65(); |
| | | // test_issues_74(); |
| | | } |
| | | |
| | | private static void test_StringSearch() { |
| | | String test = "我是中国人"; |
| | | List<String> list = new ArrayList<String>(); |
| | | list.add("中国"); |
| | | list.add("国人"); |
| | | list.add("zg人"); |
| | | System.out.println("StringSearch run Test."); |
| | | |
| | | StringSearch iwords = new StringSearch(); |
| | | iwords.SetKeywords(list); |
| | | |
| | | boolean b = iwords.ContainsAny(test); |
| | | if (b == false) { |
| | | System.out.println("ContainsAny is Error."); |
| | | } |
| | | |
| | | String f = iwords.FindFirst(test); |
| | | if (f != "中国") { |
| | | System.out.println("FindFirst is Error."); |
| | | } |
| | | |
| | | List<String> all = iwords.FindAll(test); |
| | | if (all.get(0) != "中国") { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.get(1) != "国人") { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.size() != 2) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | |
| | | String str = iwords.Replace(test, '*'); |
| | | if (str.equals("我是***") == false) { |
| | | System.out.println("Replace is Error."); |
| | | } |
| | | } |
| | | |
| | | private static void test_StringSearchEx() { |
| | | String test = "我是中国人"; |
| | | List<String> list = new ArrayList<String>(); |
| | | list.add("中国"); |
| | | list.add("国人"); |
| | | list.add("zg人"); |
| | | System.out.println("StringSearchEx run Test."); |
| | | |
| | | StringSearchEx iwords = new StringSearchEx(); |
| | | iwords.SetKeywords(list); |
| | | |
| | | boolean b = iwords.ContainsAny(test); |
| | | if (b == false) { |
| | | System.out.println("ContainsAny is Error."); |
| | | } |
| | | |
| | | String f = iwords.FindFirst(test); |
| | | if (f != "中国") { |
| | | System.out.println("FindFirst is Error."); |
| | | } |
| | | |
| | | List<String> all = iwords.FindAll(test); |
| | | if (all.get(0) != "中国") { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.get(1) != "国人") { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.size() != 2) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | |
| | | String str = iwords.Replace(test, '*'); |
| | | if (str.equals("我是***") == false) { |
| | | System.out.println("Replace is Error."); |
| | | } |
| | | } |
| | | |
| | | private static void test_StringSearchEx2() { |
| | | String test = "我是中国人"; |
| | | List<String> list = new ArrayList<String>(); |
| | | list.add("中国"); |
| | | list.add("国人"); |
| | | list.add("zg人"); |
| | | System.out.println("StringSearchEx2 run Test."); |
| | | |
| | | StringSearchEx2 iwords = new StringSearchEx2(); |
| | | iwords.SetKeywords(list); |
| | | |
| | | boolean b = iwords.ContainsAny(test); |
| | | if (b == false) { |
| | | System.out.println("ContainsAny is Error."); |
| | | } |
| | | |
| | | String f = iwords.FindFirst(test); |
| | | if (f != "中国") { |
| | | System.out.println("FindFirst is Error."); |
| | | } |
| | | |
| | | List<String> all = iwords.FindAll(test); |
| | | if (all.get(0) != "中国") { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.get(1) != "国人") { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.size() != 2) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | |
| | | String str = iwords.Replace(test, '*'); |
| | | if (str.equals("我是***") == false) { |
| | | System.out.println("Replace is Error."); |
| | | } |
| | | } |
| | | |
| | | private static void test_WordsSearch() { |
| | | String test = "我是中国人"; |
| | | List<String> list = new ArrayList<String>(); |
| | | list.add("中国"); |
| | | list.add("国人"); |
| | | list.add("zg人"); |
| | | System.out.println("WordsSearch run Test."); |
| | | |
| | | WordsSearch iwords = new WordsSearch(); |
| | | iwords.SetKeywords(list); |
| | | |
| | | boolean b = iwords.ContainsAny(test); |
| | | if (b == false) { |
| | | System.out.println("ContainsAny is Error."); |
| | | } |
| | | |
| | | WordsSearchResult f = iwords.FindFirst(test); |
| | | if (f.Keyword != "中国") { |
| | | System.out.println("FindFirst is Error."); |
| | | } |
| | | |
| | | List<WordsSearchResult> all = iwords.FindAll(test); |
| | | if (all.get(0).Keyword != "中国") { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.get(1).Keyword != "国人") { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.size() != 2) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | |
| | | String str = iwords.Replace(test, '*'); |
| | | if (str.equals("我是***") == false) { |
| | | System.out.println("Replace is Error."); |
| | | } |
| | | } |
| | | |
| | | private static void test_WordsSearchEx() throws IOException { |
| | | String test = "我是中国人"; |
| | | List<String> list = new ArrayList<String>(); |
| | | list.add("中国"); |
| | | list.add("国人"); |
| | | list.add("zg人"); |
| | | System.out.println("WordsSearchEx run Test."); |
| | | |
| | | WordsSearchEx iwords2 = new WordsSearchEx(); |
| | | iwords2.SetKeywords(list); |
| | | iwords2.Save("WordsSearchEx.dat"); |
| | | |
| | | WordsSearchEx iwords = new WordsSearchEx(); |
| | | iwords.Load("WordsSearchEx.dat"); |
| | | |
| | | boolean b = iwords.ContainsAny(test); |
| | | if (b == false) { |
| | | System.out.println("ContainsAny is Error."); |
| | | } |
| | | |
| | | WordsSearchResult f = iwords.FindFirst(test); |
| | | if (f.Keyword.equals("中国") == false) { |
| | | System.out.println("FindFirst is Error."); |
| | | } |
| | | |
| | | List<WordsSearchResult> all = iwords.FindAll(test); |
| | | if (all.get(0).Keyword.equals("中国") == false) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.get(1).Keyword.equals("国人") == false) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.size() != 2) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | |
| | | String str = iwords.Replace(test, '*'); |
| | | if (str.equals("我是***") == false) { |
| | | System.out.println("Replace is Error."); |
| | | } |
| | | } |
| | | |
| | | private static void test_WordsSearchEx2() { |
| | | String test = "我是中国人"; |
| | | List<String> list = new ArrayList<String>(); |
| | | list.add("中国"); |
| | | list.add("国人"); |
| | | list.add("zg人"); |
| | | System.out.println("WordsSearchEx2 run Test."); |
| | | |
| | | WordsSearchEx2 iwords = new WordsSearchEx2(); |
| | | iwords.SetKeywords(list); |
| | | |
| | | boolean b = iwords.ContainsAny(test); |
| | | if (b == false) { |
| | | System.out.println("ContainsAny is Error."); |
| | | } |
| | | |
| | | WordsSearchResult f = iwords.FindFirst(test); |
| | | if (f.Keyword != "中国") { |
| | | System.out.println("FindFirst is Error."); |
| | | } |
| | | |
| | | List<WordsSearchResult> all = iwords.FindAll(test); |
| | | if (all.get(0).Keyword != "中国") { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.get(1).Keyword != "国人") { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.size() != 2) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | |
| | | String str = iwords.Replace(test, '*'); |
| | | if (str.equals("我是***") == false) { |
| | | System.out.println("Replace is Error."); |
| | | } |
| | | } |
| | | |
| | | private static void test_IllegalWordsSearch() { |
| | | String test = "我是中国人"; |
| | | List<String> list = new ArrayList<String>(); |
| | | list.add("中国"); |
| | | list.add("国人"); |
| | | list.add("zg人"); |
| | | System.out.println("IllegalWordsSearch run Test."); |
| | | |
| | | IllegalWordsSearch iwords = new IllegalWordsSearch(); |
| | | iwords.SetKeywords(list); |
| | | |
| | | boolean b = iwords.ContainsAny(test); |
| | | if (b == false) { |
| | | System.out.println("ContainsAny is Error."); |
| | | } |
| | | |
| | | IllegalWordsSearchResult f = iwords.FindFirst(test); |
| | | if (f.Keyword.equals("中国") == false) { |
| | | System.out.println("FindFirst is Error."); |
| | | } |
| | | |
| | | List<IllegalWordsSearchResult> all = iwords.FindAll(test); |
| | | if (all.get(0).Keyword.equals("中国") == false) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.get(1).Keyword.equals("国人") == false) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.size() != 2) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | |
| | | String str = iwords.Replace(test, '*'); |
| | | if (str.equals("我是***") == false) { |
| | | System.out.println("Replace is Error."); |
| | | } |
| | | } |
| | | |
| | | private static void test_StringMatch() throws Exception { |
| | | String test = "我是中国人"; |
| | | List<String> list = new ArrayList<String>(); |
| | | list.add("[中美]国"); |
| | | list.add("国人"); |
| | | list.add("zg人"); |
| | | System.out.println("StringMatch run Test."); |
| | | |
| | | StringMatch iwords = new StringMatch(); |
| | | iwords.SetKeywords(list); |
| | | |
| | | boolean b = iwords.ContainsAny(test); |
| | | if (b == false) { |
| | | System.out.println("ContainsAny is Error."); |
| | | } |
| | | |
| | | String f = iwords.FindFirst(test); |
| | | if (!f.equals("中国")) { |
| | | System.out.println("FindFirst is Error."); |
| | | } |
| | | |
| | | List<String> all = iwords.FindAll(test); |
| | | if (!all.get(0).equals("中国")) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (!all.get(1).equals("国人")) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.size() != 2) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | |
| | | String str = iwords.Replace(test, '*'); |
| | | if (str.equals("我是***") == false) { |
| | | System.out.println("Replace is Error."); |
| | | } |
| | | } |
| | | |
| | | private static void test_StringMatchEx() throws Exception { |
| | | String test = "我是中国人"; |
| | | List<String> list = new ArrayList<String>(); |
| | | list.add("[中美]国"); |
| | | list.add("国人"); |
| | | list.add("zg人"); |
| | | System.out.println("StringMatchEx run Test."); |
| | | |
| | | StringMatchEx iwords = new StringMatchEx(); |
| | | iwords.SetKeywords(list); |
| | | |
| | | boolean b = iwords.ContainsAny(test); |
| | | if (b == false) { |
| | | System.out.println("ContainsAny is Error."); |
| | | } |
| | | |
| | | String f = iwords.FindFirst(test); |
| | | if (!f.equals("中国")) { |
| | | System.out.println("FindFirst is Error."); |
| | | } |
| | | |
| | | List<String> all = iwords.FindAll(test); |
| | | if (!all.get(0).equals("中国")) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (!all.get(1).equals("国人")) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.size() != 2) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | |
| | | String str = iwords.Replace(test, '*'); |
| | | if (str.equals("我是***") == false) { |
| | | System.out.println("Replace is Error."); |
| | | } |
| | | } |
| | | |
| | | private static void test_WordsMatch() throws Exception { |
| | | String test = "我是中国人"; |
| | | List<String> list = new ArrayList<String>(); |
| | | list.add("[中美]国"); |
| | | list.add("国人"); |
| | | list.add("zg人"); |
| | | System.out.println("WordsMatch run Test."); |
| | | |
| | | WordsMatch iwords = new WordsMatch(); |
| | | iwords.SetKeywords(list); |
| | | |
| | | boolean b = iwords.ContainsAny(test); |
| | | if (b == false) { |
| | | System.out.println("ContainsAny is Error."); |
| | | } |
| | | |
| | | WordsSearchResult f = iwords.FindFirst(test); |
| | | if (f.Keyword.equals("中国") == false) { |
| | | System.out.println("FindFirst is Error."); |
| | | } |
| | | |
| | | List<WordsSearchResult> all = iwords.FindAll(test); |
| | | if (all.get(0).Keyword.equals("中国") == false) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.get(1).Keyword.equals("国人") == false) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.size() != 2) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | |
| | | String str = iwords.Replace(test, '*'); |
| | | if (str.equals("我是***") == false) { |
| | | System.out.println("Replace is Error."); |
| | | } |
| | | } |
| | | |
| | | private static void test_WordsMatchEx() throws Exception { |
| | | String test = "我是中国人"; |
| | | List<String> list = new ArrayList<String>(); |
| | | list.add("[中美]国"); |
| | | list.add("国人"); |
| | | list.add("zg人"); |
| | | System.out.println("WordsMatchEx run Test."); |
| | | |
| | | WordsMatchEx iwords = new WordsMatchEx(); |
| | | iwords.SetKeywords(list); |
| | | |
| | | boolean b = iwords.ContainsAny(test); |
| | | if (b == false) { |
| | | System.out.println("ContainsAny is Error."); |
| | | } |
| | | |
| | | WordsSearchResult f = iwords.FindFirst(test); |
| | | if (f.Keyword.equals("中国") == false) { |
| | | System.out.println("FindFirst is Error."); |
| | | } |
| | | |
| | | List<WordsSearchResult> all = iwords.FindAll(test); |
| | | if (all.get(0).Keyword.equals("中国") == false) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.get(1).Keyword.equals("国人") == false) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.size() != 2) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | |
| | | String str = iwords.Replace(test, '*'); |
| | | if (str.equals("我是***") == false) { |
| | | System.out.println("Replace is Error."); |
| | | } |
| | | } |
| | | |
| | | private static void test_PinyinMatch() throws NumberFormatException, IOException { |
| | | String s = "北京|天津|河北|辽宁|吉林|黑龙江|山东|江苏|上海|浙江|安徽|福建|江西|广东|广西|海南|河南|湖南|湖北|山西|内蒙古|宁夏|青海|陕西|甘肃|新疆|四川|贵州|云南|重庆|西藏|香港|澳门|台湾"; |
| | | List<String> list = new ArrayList<String>(); |
| | | String[] ss = s.split("\\|"); |
| | | for (String st : ss) { |
| | | list.add(st); |
| | | } |
| | | PinyinMatch match = new PinyinMatch(); |
| | | match.SetKeywords(list); |
| | | System.out.println("PinyinMatch run Test."); |
| | | |
| | | List<String> all = match.Find("BJ"); |
| | | if (all.get(0).equals("北京") == false) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | if (all.size() != 1) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | |
| | | all = match.Find("北J"); |
| | | if (all.get(0).equals("北京") == false) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | if (all.size() != 1) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | |
| | | all = match.Find("北Ji"); |
| | | if (all.get(0).equals("北京") == false) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | if (all.size() != 1) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | all = match.Find("Su"); |
| | | if (all.get(0).equals("江苏") == false) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | |
| | | all = match.Find("Sdon"); |
| | | if (all.get(0).equals("山东") == false) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | if (all.size() != 1) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | all = match.Find("S东"); |
| | | if (all.get(0).equals("山东") == false) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | if (all.size() != 1) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | |
| | | List<Integer> all2 = match.FindIndex("BJ"); |
| | | if (all2.get(0) != 0) { |
| | | System.out.println("FindIndex is Error."); |
| | | } |
| | | if (all2.size() != 1) { |
| | | System.out.println("FindIndex is Error."); |
| | | } |
| | | |
| | | all = match.FindWithSpace("S 东"); |
| | | if (all.get(0).equals("山东") == false) { |
| | | System.out.println("FindWithSpace is Error."); |
| | | } |
| | | if (all.size() != 1) { |
| | | System.out.println("FindWithSpace is Error."); |
| | | } |
| | | |
| | | all = match.FindWithSpace("h 江"); |
| | | if (all.get(0).equals("黑龙江") == false) { |
| | | System.out.println("FindWithSpace is Error."); |
| | | } |
| | | |
| | | all2 = match.FindIndexWithSpace("B J"); |
| | | if (all2.get(0) != 0) { |
| | | System.out.println("FindIndexWithSpace is Error."); |
| | | } |
| | | if (all2.size() != 1) { |
| | | System.out.println("FindIndexWithSpace is Error."); |
| | | } |
| | | |
| | | all = match.FindWithSpace("京 北"); |
| | | if (all.size() != 0) { |
| | | System.out.println("FindWithSpace is Error."); |
| | | } |
| | | |
| | | all = match.FindWithSpace("黑龙 龙江"); |
| | | if (all.size() != 0) { |
| | | System.out.println("FindWithSpace is Error."); |
| | | } |
| | | |
| | | all = match.FindWithSpace("黑龙 江"); |
| | | if (all.get(0).equals("黑龙江") == false) { |
| | | System.out.println("FindWithSpace is Error."); |
| | | } |
| | | all = match.FindWithSpace("黑 龙 江"); |
| | | if (all.get(0).equals("黑龙江") == false) { |
| | | System.out.println("FindWithSpace is Error."); |
| | | } |
| | | } |
| | | |
| | | private static void test_PinyinMatch2() throws Exception { |
| | | String s = "北京|天津|河北|辽宁|吉林|黑龙江|山东|江苏|上海|浙江|安徽|福建|江西|广东|广西|海南|河南|湖南|湖北|山西|内蒙古|宁夏|青海|陕西|甘肃|新疆|四川|贵州|云南|重庆|西藏|香港|澳门|台湾"; |
| | | List<String> list = new ArrayList<String>(); |
| | | String[] ss = s.split("\\|"); |
| | | for (String st : ss) { |
| | | list.add(st); |
| | | } |
| | | PinyinMatch2<String> match = new PinyinMatch2<String>(list); |
| | | match.SetKeywordsFunc(new Function<String, String>() { |
| | | @Override |
| | | public String apply(String t) { |
| | | return t; |
| | | } |
| | | }); |
| | | |
| | | System.out.println("PinyinMatch2 run Test."); |
| | | |
| | | List<String> all = match.Find("BJ"); |
| | | if (all.get(0).equals("北京") == false) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | if (all.size() != 1) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | |
| | | all = match.Find("北J"); |
| | | if (all.get(0).equals("北京") == false) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | if (all.size() != 1) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | |
| | | all = match.Find("北Ji"); |
| | | if (all.get(0).equals("北京") == false) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | if (all.size() != 1) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | all = match.Find("Su"); |
| | | if (all.get(0).equals("江苏") == false) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | |
| | | all = match.Find("Sdon"); |
| | | if (all.get(0).equals("山东") == false) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | if (all.size() != 1) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | all = match.Find("S东"); |
| | | if (all.get(0).equals("山东") == false) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | if (all.size() != 1) { |
| | | System.out.println("Find is Error."); |
| | | } |
| | | |
| | | all = match.FindWithSpace("S 东"); |
| | | if (all.get(0).equals("山东") == false) { |
| | | System.out.println("FindWithSpace is Error."); |
| | | } |
| | | if (all.size() != 1) { |
| | | System.out.println("FindWithSpace is Error."); |
| | | } |
| | | |
| | | all = match.FindWithSpace("h 江"); |
| | | if (all.get(0).equals("黑龙江") == false) { |
| | | System.out.println("FindWithSpace is Error."); |
| | | } |
| | | |
| | | all = match.FindWithSpace("京 北"); |
| | | if (all.size() != 0) { |
| | | System.out.println("FindWithSpace is Error."); |
| | | } |
| | | |
| | | all = match.FindWithSpace("黑龙 龙江"); |
| | | if (all.size() != 0) { |
| | | System.out.println("FindWithSpace is Error."); |
| | | } |
| | | |
| | | all = match.FindWithSpace("黑龙 江"); |
| | | if (all.get(0).equals("黑龙江") == false) { |
| | | System.out.println("FindWithSpace is Error."); |
| | | } |
| | | all = match.FindWithSpace("黑 龙 江"); |
| | | if (all.get(0).equals("黑龙江") == false) { |
| | | System.out.println("FindWithSpace is Error."); |
| | | } |
| | | } |
| | | |
| | | private static void test_save_load() throws IOException { |
| | | String test = "我是中国人"; |
| | | List<String> list = new ArrayList<String>(); |
| | | list.add("中国"); |
| | | list.add("国人"); |
| | | list.add("zg人"); |
| | | System.out.println("test_save_load run Test."); |
| | | |
| | | StringSearchEx2 search = new StringSearchEx2(); |
| | | search.SetKeywords(list); |
| | | search.Save("1.dat"); |
| | | |
| | | StringSearchEx2 iwords = new StringSearchEx2(); |
| | | iwords.Load("1.dat"); |
| | | |
| | | boolean b = iwords.ContainsAny(test); |
| | | if (b == false) { |
| | | System.out.println("ContainsAny is Error."); |
| | | } |
| | | |
| | | String f = iwords.FindFirst(test); |
| | | if (f != "中国") { |
| | | System.out.println("FindFirst is Error."); |
| | | } |
| | | |
| | | List<String> all = iwords.FindAll(test); |
| | | if (all.get(0) != "中国") { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.get(1) != "国人") { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.size() != 2) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | |
| | | String str = iwords.Replace(test, '*'); |
| | | if (str.equals("我是***") == false) { |
| | | System.out.println("Replace is Error."); |
| | | } |
| | | } |
| | | |
| | | private static void test_times() { |
| | | String ts = readLineByLineJava8("BadWord.txt"); |
| | | String[] sp = ts.split("[\r\n]"); |
| | | List<String> list = new ArrayList<String>(); |
| | | for (String item : sp) { |
| | | list.add(item); |
| | | } |
| | | String words = readLineByLineJava8("Talk.txt"); |
| | | |
| | | StringSearchEx2 iwords = new StringSearchEx2(); |
| | | iwords.SetKeywords(list); |
| | | |
| | | StopWatch sw = new StopWatch(); |
| | | sw.start("校验耗时"); |
| | | for (int i = 0; i < 100000; i++) { |
| | | // iwords.ContainsAny(words); |
| | | iwords.FindAll(words); |
| | | // System.out.println(list2.size()); |
| | | } |
| | | sw.stop(); |
| | | System.out.println(sw.getTotalTimeMillis() + "ms"); |
| | | |
| | | } |
| | | |
| | | private static String readLineByLineJava8(String filePath) { |
| | | StringBuilder contentBuilder = new StringBuilder(); |
| | | try (Stream<String> stream = Files.lines(Paths.get(filePath), StandardCharsets.UTF_8)) { |
| | | stream.forEach(s -> contentBuilder.append(s).append("\n")); |
| | | } catch (IOException e) { |
| | | e.printStackTrace(); |
| | | } |
| | | return contentBuilder.toString(); |
| | | } |
| | | |
| | | private static void test_IllegalWordsSearch_loadWordsFormBinaryFile() throws IOException { |
| | | |
| | | long l1 = System.currentTimeMillis(); |
| | | |
| | | IllegalWordsSearch search = new IllegalWordsSearch(); |
| | | long l2 = System.currentTimeMillis(); |
| | | System.out.println("IllegalWordsSearch init time:" + (l2 - l1)); |
| | | |
| | | search.Load(new ClassPathResource("IllegalWordsSearch.dat").getFile().getAbsolutePath()); |
| | | long l3 = System.currentTimeMillis(); |
| | | System.out.println("load Load time:" + (l3 - l2)); |
| | | |
| | | String test = "卖毒品哈哈哈哈毛澤東porn哈哈哈哈胡锦涛pornasds哈哈哈哈胡锦涛porn哈哈哈哈胡锦涛porn哈哈哈哈胡锦涛胡锦涛撒旦撒旦pornporn哈哈哈哈胡锦涛porn哈哈哈哈胡锦涛porn" |
| | | + "哈哈哈哈胡锦涛porn哈哈哈哈胡锦涛porn哈哈哈哈胡錦濤porn哈哈哈哈胡锦涛porn哈哈哈哈胡锦涛porn哈哈哈哈胡锦涛porn哈哈哈哈胡锦涛porn哈哈哈哈胡锦涛porn" |
| | | + "哈哈哈哈胡锦涛porn哈哈哈哈胡锦涛porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn" |
| | | + "哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn" |
| | | + "哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn" |
| | | + "哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn" |
| | | + "哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn哈哈哈哈或porn"; |
| | | |
| | | boolean b = search.ContainsAny(test); |
| | | if (!b) { |
| | | System.out.println("ContainsAny is Error."); |
| | | } |
| | | long l4 = System.currentTimeMillis(); |
| | | System.out.println("ContainsAny time:" + (l4 - l3)); |
| | | |
| | | String str = search.Replace(test, '*'); |
| | | long l5 = System.currentTimeMillis(); |
| | | System.out.println("Replace Result:" + str); |
| | | System.out.println("Replace time:" + (l5 - l4)); |
| | | } |
| | | |
| | | private static void test_IllegalWordsSearch_saveToBinaryFile() throws IOException { |
| | | List<String> list = new ArrayList<>(); |
| | | try (BufferedReader bufferedReader = new BufferedReader( |
| | | new InputStreamReader(new ClassPathResource("sensi_words.txt").getInputStream()))) { |
| | | for (String line = bufferedReader.readLine(); line != null; line = bufferedReader.readLine()) { |
| | | list.add(line); |
| | | } |
| | | } |
| | | IllegalWordsSearch search = new IllegalWordsSearch(); |
| | | search.SetKeywords(list); |
| | | search.Save("IllegalWordsSearch.dat"); |
| | | } |
| | | |
| | | private static void test_Pinyin() throws NumberFormatException, IOException { |
| | | System.out.println("text_Pinyin run Test."); |
| | | List<String> t = WordsHelper.GetAllPinyin('芃'); |
| | | if (t.get(0).equals("Peng") == false) { |
| | | System.out.println("GetAllPinyin is Error."); |
| | | } |
| | | |
| | | String a = WordsHelper.GetPinyinFast("阿"); |
| | | if (a.equals("A") == false) { |
| | | System.out.println("GetPinyinFast is Error."); |
| | | } |
| | | |
| | | String b = WordsHelper.GetPinyin("摩擦棒"); |
| | | if (b.equals("MoCaBang") == false) { |
| | | System.out.println("GetPinyin is Error."); |
| | | } |
| | | b = WordsHelper.GetPinyin("秘鲁"); |
| | | if (b.equals("BiLu") == false) { |
| | | System.out.println("GetPinyin is Error."); |
| | | } |
| | | |
| | | String py = WordsHelper.GetPinyinFast("我爱中国"); |
| | | if (py.equals("WoAiZhongGuo") == false) { |
| | | System.out.println("GetPinyinFast is Error."); |
| | | } |
| | | |
| | | py = WordsHelper.GetPinyin("快乐,乐清"); |
| | | if (py.equals("KuaiLe,YueQing") == false) { |
| | | System.out.println("GetPinyin is Error."); |
| | | } |
| | | |
| | | py = WordsHelper.GetPinyin("快乐清理"); |
| | | if (py.equals("KuaiLeQingLi") == false) { |
| | | System.out.println("GetPinyin is Error."); |
| | | } |
| | | |
| | | py = WordsHelper.GetPinyin("我爱中国", true); |
| | | if (py.equals("WǒÀiZhōngGuó") == false) { |
| | | System.out.println("GetPinyin is Error."); |
| | | } |
| | | |
| | | py = WordsHelper.GetFirstPinyin("我爱中国"); |
| | | if (py.equals("WAZG") == false) { |
| | | System.out.println("GetPinyin is Error."); |
| | | } |
| | | |
| | | List<String> pys = WordsHelper.GetAllPinyin('传'); |
| | | if (pys.get(0).equals("Chuan") == false) { |
| | | System.out.println("GetAllPinyin is Error."); |
| | | } |
| | | if (pys.get(1).equals("Zhuan") == false) { |
| | | System.out.println("GetAllPinyin is Error."); |
| | | } |
| | | |
| | | py = WordsHelper.GetPinyinForName("单一一"); |
| | | if (py.equals("ShanYiYi") == false) { |
| | | System.out.println("GetPinyinForName is Error."); |
| | | } |
| | | |
| | | py = WordsHelper.GetPinyinForName("单一一", true); |
| | | if (py.equals("ShànYīYī") == false) { |
| | | System.out.println("GetPinyinForName is Error."); |
| | | } |
| | | |
| | | List<String> all = WordsHelper.GetAllPinyin('石'); |
| | | if (all.size() == 0) { |
| | | System.out.println("GetAllPinyin is Error."); |
| | | } |
| | | |
| | | } |
| | | |
| | | private static void test_words() throws Exception { |
| | | System.out.println("test_words run Test."); |
| | | String s = WordsHelper.ToSimplifiedChinese("壹佰贰拾叁億肆仟伍佰陆拾柒萬捌仟玖佰零壹元壹角贰分"); |
| | | if (s.equals("壹佰贰拾叁亿肆仟伍佰陆拾柒万捌仟玖佰零壹元壹角贰分") == false) { |
| | | System.out.println("ToSimplifiedChinese is Error."); |
| | | } |
| | | |
| | | String tw = WordsHelper.ToTraditionalChinese("壹佰贰拾叁亿肆仟伍佰陆拾柒万捌仟玖佰零壹元壹角贰分"); |
| | | if (tw.equals("壹佰貳拾叄億肆仟伍佰陸拾柒萬捌仟玖佰零壹元壹角貳分") == false) { |
| | | System.out.println("ToTraditionalChinese is Error."); |
| | | } |
| | | |
| | | String tw2 = WordsHelper.ToTraditionalChinese("原代码11", 2); |
| | | if (tw2.equals("原始碼11") == false) { |
| | | System.out.println("ToTraditionalChinese is Error."); |
| | | } |
| | | |
| | | String tw3 = WordsHelper.ToTraditionalChinese("反反复复", 2); |
| | | if (tw3.equals("反反覆覆") == false) { |
| | | System.out.println("ToTraditionalChinese is Error."); |
| | | } |
| | | |
| | | String tw4 = WordsHelper.ToTraditionalChinese("这人考虑事情总是反反复复的", 2); |
| | | if (tw4.equals("這人考慮事情總是反反覆覆的") == false) { |
| | | System.out.println("ToTraditionalChinese is Error."); |
| | | } |
| | | |
| | | } |
| | | |
| | | public static void test_issues_54() { |
| | | IllegalWordsSearch search = new IllegalWordsSearch(); |
| | | search.SetKeywords(Arrays.asList("test", "world", "this", "hello", "monster")); |
| | | String result = search.Replace("test, hahaha, this is a hello world", '*'); |
| | | if (result.equals("****, hahaha, **** is a ***** *****") == false) { |
| | | System.out.println("IllegalWordsSearch Replace is Error."); |
| | | } |
| | | } |
| | | public static void test_issues_57(){ |
| | | String test = "一,二二,三三三,四四四四,五五五五五,六六六六六六"; |
| | | List<String> list = new ArrayList<String>(); |
| | | list.add("一"); |
| | | list.add("二二"); |
| | | list.add("三三三"); |
| | | list.add("四四四四"); |
| | | list.add("五五五五五"); |
| | | list.add("六六六六六六"); |
| | | System.out.println("test_issues_57 run Test."); |
| | | |
| | | IllegalWordsSearch iwords = new IllegalWordsSearch(); |
| | | iwords.SetKeywords(list); |
| | | |
| | | boolean b = iwords.ContainsAny(test); |
| | | if (b == false) { |
| | | System.out.println("ContainsAny is Error."); |
| | | } |
| | | |
| | | IllegalWordsSearchResult f = iwords.FindFirst(test); |
| | | if (f.Keyword.equals("一") == false) { |
| | | System.out.println("FindFirst is Error."); |
| | | } |
| | | |
| | | List<IllegalWordsSearchResult> all = iwords.FindAll(test); |
| | | if (all.get(0).Keyword.equals("一") == false) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.get(1).Keyword.equals("二二") == false) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.get(2).Keyword.equals("三三三") == false) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.get(3).Keyword.equals("四四四四") == false) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.get(4).Keyword.equals("五五五五五") == false) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | if (all.get(5).Keyword.equals("六六六六六六") == false) { |
| | | System.out.println("FindAll is Error."); |
| | | } |
| | | } |
| | | |
| | | public static void test_issues_57_2(){ |
| | | String test = "jameson吃饭"; |
| | | List<String> list = new ArrayList<String>(); |
| | | list.add("jameson吃饭"); |
| | | list.add("吃饭jameson"); |
| | | System.out.println("test_issues_57_2 run Test."); |
| | | |
| | | IllegalWordsSearch iwords = new IllegalWordsSearch(); |
| | | iwords.SetKeywords(list); |
| | | |
| | | boolean b = iwords.ContainsAny(test); |
| | | if (b == false) { |
| | | System.out.println("ContainsAny is Error."); |
| | | } |
| | | |
| | | IllegalWordsSearchResult f = iwords.FindFirst(test); |
| | | if (f.Keyword.equals("jameson吃饭") == false) { |
| | | System.out.println("FindFirst is Error."); |
| | | } |
| | | } |
| | | public static void test_issues_57_3(){ |
| | | String test = "his is sha ash"; |
| | | List<String> list = new ArrayList<String>(); |
| | | list.add("ash"); |
| | | list.add("sha"); |
| | | list.add("bcd"); |
| | | System.out.println("test_issues_57_3 run Test."); |
| | | |
| | | IllegalWordsSearch iwords = new IllegalWordsSearch(); |
| | | iwords.SetKeywords(list); |
| | | |
| | | boolean b = iwords.ContainsAny(test); |
| | | if (b == false) { |
| | | System.out.println("ContainsAny is Error."); |
| | | } |
| | | |
| | | IllegalWordsSearchResult f = iwords.FindFirst(test); |
| | | if (f == null || f.Keyword.equals("sha") == false) { |
| | | System.out.println("FindFirst is Error."); |
| | | } |
| | | } |
| | | public static void test_issues_65(){ |
| | | String test = "fFuck"; |
| | | List<String> list = new ArrayList<String>(); |
| | | list.add("fuck"); |
| | | list.add("ffx"); |
| | | list.add("bcd"); |
| | | System.out.println("test_issues_65 run Test."); |
| | | |
| | | IllegalWordsSearch iwords = new IllegalWordsSearch(); |
| | | iwords.SetKeywords(list); |
| | | |
| | | boolean b = iwords.ContainsAny(test); |
| | | if (b == false) { |
| | | System.out.println("ContainsAny is Error."); |
| | | } |
| | | |
| | | String f = iwords.Replace(test); |
| | | if (f == null || f.equals("*****") == false) { |
| | | System.out.println("Replace is Error."); |
| | | } |
| | | } |
| | | |
| | | public static void test_issues_74(){ |
| | | List<String> list =loadKeywords(new File("sensi_words.txt")); |
| | | System.out.println("test_issues_74 run Test."); |
| | | |
| | | IllegalWordsSearch iwords = new IllegalWordsSearch(); |
| | | iwords.SetKeywords(list); |
| | | String test = "机机歪歪"; |
| | | |
| | | boolean b = iwords.ContainsAny(test); |
| | | if (b==false) { |
| | | System.out.println("ContainsAny is Error."); |
| | | } |
| | | } |
| | | |
| | | public static List<String> loadKeywords(File file){ |
| | | List<String> keyArray=new ArrayList<String>(); |
| | | try{ |
| | | BufferedReader br = new BufferedReader(new FileReader(file));//构造一个BufferedReader类来读取文件 |
| | | String s = null; |
| | | while((s = br.readLine())!=null){//使用readLine方法,一次读一行 |
| | | keyArray.add(s); |
| | | } |
| | | br.close(); |
| | | }catch(Exception e){ |
| | | e.printStackTrace(); |
| | | } |
| | | return keyArray; |
| | | } |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words; |
| | | |
| | | import org.springblade.modules.words.internals.BaseSearchEx; |
| | | |
| | | import java.io.FileOutputStream; |
| | | import java.io.IOException; |
| | | import java.io.InputStream; |
| | | import java.util.ArrayList; |
| | | import java.util.HashSet; |
| | | import java.util.List; |
| | | import java.util.Set; |
| | | import java.util.function.Function; |
| | | |
| | | /** |
| | | * 最新版本的IllegalWordsSearch, 与2020.05.24以前的版本不兼容, IllegalWordsSearch类太费精力了,头发稀疏了。 |
| | | * 我未来可能以敏感词过滤做为创业项目,所以这是最后的开源版本,不再免费补bug了。 |
| | | * IllegalWordsSearch修复了2020-10-8日前所有bug。 |
| | | */ |
| | | @Deprecated |
| | | public class IllegalWordsSearch extends BaseSearchEx { |
| | | public class SkipWordFilterHandler { |
| | | public char c; |
| | | public String text; |
| | | public int index; |
| | | |
| | | public SkipWordFilterHandler(final char c, final String text, final int index) { |
| | | this.c = c; |
| | | this.text = text; |
| | | this.index = index; |
| | | } |
| | | } |
| | | |
| | | public class CharTranslateHandler { |
| | | public char c; |
| | | public String text; |
| | | public int index; |
| | | |
| | | public CharTranslateHandler(final char c, final String text, final int index) { |
| | | this.c = c; |
| | | this.text = text; |
| | | this.index = index; |
| | | } |
| | | } |
| | | |
| | | public class StringMatchHandler { |
| | | public String text; |
| | | public int start; |
| | | public int end; |
| | | public String keyword; |
| | | public int keywordIndex; |
| | | public String matchKeyword; |
| | | public int blacklistIndex; |
| | | |
| | | public StringMatchHandler(final String text, final int start, final int end, final String keyword, |
| | | final int keywordIndex, final String matchKeyword, final int blacklistIndex) { |
| | | this.text = text; |
| | | this.start = start; |
| | | this.end = end; |
| | | this.keyword = keyword; |
| | | this.keywordIndex = keywordIndex; |
| | | this.matchKeyword = matchKeyword; |
| | | this.blacklistIndex = blacklistIndex; |
| | | } |
| | | } |
| | | |
| | | /** |
| | | * 使用跳词过滤器,默认使用 |
| | | */ |
| | | public boolean UseSkipWordFilter = true; |
| | | private final String _skipList = " \t\r\n~!@#$%^&*()_+-=【】、[]{}|;" |
| | | + "':\",。、《》?αβγδεζηθικλμνξοπρστυφχψωΑΒΓΔΕΖΗΘΙΚΛΜΝΞΟΠΡΣΤΥΦΧΨΩ。,、;:?!…—·ˉ¨‘’“”々~‖∶"'`|〃〔〕〈〉《》「」『』.〖〗【】()[]{}ⅠⅡⅢⅣⅤⅥⅦⅧⅨⅩⅪⅫ⒈⒉⒊⒋⒌⒍⒎⒏⒐⒑⒒⒓⒔⒕⒖⒗⒘⒙⒚⒛㈠㈡㈢㈣㈤㈥㈦㈧㈨㈩①②③④⑤⑥⑦⑧⑨⑩⑴⑵⑶⑷⑸⑹⑺⑻⑼⑽⑾⑿⒀⒁⒂⒃⒄⒅⒆⒇≈≡≠=≤≥<>≮≯∷±+-×÷/∫∮∝∞∧∨∑∏∪∩∈∵∴⊥∥∠⌒⊙≌∽√§№☆★○●◎◇◆□℃‰€■△▲※→←↑↓〓¤°#&@\︿_ ̄―♂♀┌┍┎┐┑┒┓─┄┈├┝┞┟┠┡┢┣│┆┊┬┭┮┯┰┱┲┳┼┽┾┿╀╁╂╃└┕┖┗┘┙┚┛━┅┉┤┥┦┧┨┩┪┫┃┇┋┴┵┶┷┸┹┺┻╋╊╉╈╇╆╅╄"; |
| | | private boolean[] _skipBitArray; |
| | | |
| | | /** 过滤跳词 */ |
| | | public Function<SkipWordFilterHandler, Boolean> SkipWordFilter; |
| | | /** |
| | | * 字符转化,可以设置繁简转化、忽略大小写,启用后UseIgnoreCase开启无效 |
| | | * 若想使用CharTranslateHandler,请先添加事件CharTranslateHandler, 再用SetKeywords设置关键字 |
| | | */ |
| | | public Function<CharTranslateHandler, Character> CharTranslate; |
| | | |
| | | /** |
| | | * 自定义字符串匹配 |
| | | */ |
| | | public Function<StringMatchHandler, Boolean> StringMatch; |
| | | |
| | | /** |
| | | * 使用重复词过滤器 |
| | | */ |
| | | public boolean UseDuplicateWordFilter = true; |
| | | /** |
| | | * 使用黑名单过滤器 |
| | | */ |
| | | private int[] _blacklist = new int[0]; |
| | | /** |
| | | * 使用半角转化器 |
| | | */ |
| | | public boolean UseDBCcaseConverter = true; |
| | | /** |
| | | * 使用忽略大小写 |
| | | */ |
| | | public boolean UseIgnoreCase = true; |
| | | |
| | | /** |
| | | * 最新版本的IllegalWordsSearch, 与2020.05.24以前的版本不兼容, IllegalWordsSearch类太费精力了,头发稀疏了。 |
| | | * 我未来可能以敏感词过滤做为创业项目,所以这是最后的开源版本,不再免费补bug了。 |
| | | * IllegalWordsSearch修复了2020-10-8日前所有bug。 |
| | | */ |
| | | public IllegalWordsSearch() { |
| | | _skipBitArray = new boolean[Character.MAX_VALUE + 1]; |
| | | for (int i = 0; i < _skipList.length(); i++) { |
| | | _skipBitArray[_skipList.charAt(i)] = true; |
| | | } |
| | | SkipWordFilter = null; |
| | | CharTranslate = null; |
| | | StringMatch = null; |
| | | } |
| | | |
| | | /** |
| | | * 设置跳词 |
| | | * |
| | | * @param skipList |
| | | */ |
| | | public void SetSkipWords(final String skipList) { |
| | | |
| | | _skipBitArray = new boolean[Character.MAX_VALUE + 1]; |
| | | if (skipList != null) { |
| | | for (int i = 0; i < skipList.length(); i++) { |
| | | _skipBitArray[skipList.charAt(i)] = true; |
| | | } |
| | | } |
| | | } |
| | | |
| | | /** |
| | | * 设置关键字 如果想使用CharTranslateHandler,请先添加事件CharTranslateHandler, |
| | | * 再用SetKeywords设置关键字 使用CharTranslateHandler后,UseIgnoreCase配置无效 |
| | | * 如果不使用忽略大小写,请先UseIgnoreCase设置为false,再用SetKeywords设置关键字 |
| | | * |
| | | * @param keywords |
| | | */ |
| | | public void SetKeywords(final List<String> keywords) { |
| | | if (CharTranslate != null) { |
| | | final Set<String> kws = new HashSet<String>(keywords); |
| | | final List<String> list = new ArrayList<String>(); |
| | | for (final String item : kws) { |
| | | final StringBuilder sb = new StringBuilder(); |
| | | for (int i = 0; i < item.length(); i++) { |
| | | final char c = CharTranslate.apply(new CharTranslateHandler(item.charAt(i), item, i)); |
| | | sb.append(c); |
| | | } |
| | | list.add(sb.toString()); |
| | | } |
| | | super.SetKeywords(list); |
| | | } else if (UseDBCcaseConverter || UseIgnoreCase) { |
| | | final Set<String> kws = new HashSet<String>(keywords); |
| | | final List<String> list = new ArrayList<String>(); |
| | | for (final String item : kws) { |
| | | list.add(ToSenseWord(item)); |
| | | } |
| | | super.SetKeywords(list); |
| | | } else { |
| | | super.SetKeywords(keywords); |
| | | } |
| | | } |
| | | |
| | | protected void Save(final FileOutputStream bw) throws IOException { |
| | | super.Save(bw); |
| | | |
| | | bw.write(UseSkipWordFilter ? 1 : 0); |
| | | bw.write(NumHelper.serialize(_skipBitArray.length)); |
| | | for (final boolean item : _skipBitArray) { |
| | | bw.write(item ? 1 : 0); |
| | | } |
| | | |
| | | bw.write(UseDuplicateWordFilter ? 1 : 0); |
| | | bw.write(NumHelper.serialize(_blacklist.length)); |
| | | for (final int item : _blacklist) { |
| | | bw.write(NumHelper.serialize(item)); |
| | | } |
| | | |
| | | bw.write(UseDBCcaseConverter ? 1 : 0); |
| | | bw.write(UseIgnoreCase ? 1 : 0); |
| | | } |
| | | |
| | | public void Load(final InputStream br) throws IOException { |
| | | super.Load(br); |
| | | |
| | | UseSkipWordFilter = br.read() > 0; |
| | | int length = NumHelper.read(br); |
| | | _skipBitArray = new boolean[length]; |
| | | for (int i = 0; i < length; i++) { |
| | | _skipBitArray[i] = br.read() > 0; |
| | | } |
| | | |
| | | UseDuplicateWordFilter = br.read() > 0; |
| | | length = NumHelper.read(br); |
| | | _blacklist = new int[length]; |
| | | for (int i = 0; i < length; i++) { |
| | | _blacklist[i] = NumHelper.read(br); |
| | | } |
| | | |
| | | UseDBCcaseConverter = br.read() > 0; |
| | | UseIgnoreCase = br.read() > 0; |
| | | } |
| | | |
| | | /** |
| | | * 在文本中查找所有的关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public List<IllegalWordsSearchResult> FindAll(final String text) { |
| | | final List<IllegalWordsSearchResult> results = new ArrayList<IllegalWordsSearchResult>(); |
| | | int p = 0; |
| | | char pChar = (char) 0; |
| | | |
| | | for (int i = 0; i < text.length(); i++) { |
| | | char t1 = text.charAt(i); |
| | | if (UseSkipWordFilter) { |
| | | if (SkipWordFilter != null) {// 跳词跳过 |
| | | if (SkipWordFilter.apply(new SkipWordFilterHandler(t1, text, i))) { |
| | | continue; |
| | | } |
| | | } else if (_skipBitArray[t1]) { |
| | | continue; |
| | | } |
| | | } |
| | | |
| | | if (CharTranslate != null) { // 字符串转换 |
| | | t1 = CharTranslate.apply(new CharTranslateHandler(t1, text, i)); |
| | | } else if (UseDBCcaseConverter || UseIgnoreCase) { |
| | | t1 = ToSenseWord(t1); |
| | | } |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | pChar = t1; |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _first[t]; |
| | | } else { |
| | | final int index = _nextIndex[p].IndexOf(t); |
| | | if (index > -1) { |
| | | next = _nextIndex[p].GetValue(index); |
| | | } else if (UseDuplicateWordFilter && pChar == t1) { |
| | | next = p; |
| | | } else { |
| | | next = _first[t]; |
| | | } |
| | | } |
| | | |
| | | if (next != 0) { |
| | | if (_end[next] < _end[next + 1] && CheckNextChar(text, t1, i)) { |
| | | for (int j = _end[next]; j < _end[next + 1]; j++) { |
| | | final int index = _resultIndex[j]; |
| | | final IllegalWordsSearchResult r = GetGetIllegalResult(text, i, index); |
| | | if (r != null) { |
| | | results.add(r); |
| | | } |
| | | } |
| | | } |
| | | } |
| | | p = next; |
| | | pChar = t1; |
| | | } |
| | | return results; |
| | | } |
| | | |
| | | /** |
| | | * 在文本中查找第一个关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public IllegalWordsSearchResult FindFirst(final String text) { |
| | | int p = 0; |
| | | char pChar = (char) 0; |
| | | |
| | | for (int i = 0; i < text.length(); i++) { |
| | | char t1 = text.charAt(i); |
| | | if (UseSkipWordFilter) { |
| | | if (SkipWordFilter != null) {// 跳词跳过 |
| | | if (SkipWordFilter.apply(new SkipWordFilterHandler(t1, text, i))) { |
| | | continue; |
| | | } |
| | | } else if (_skipBitArray[t1]) { |
| | | continue; |
| | | } |
| | | } |
| | | |
| | | if (CharTranslate != null) { // 字符串转换 |
| | | t1 = CharTranslate.apply(new CharTranslateHandler(t1, text, i)); |
| | | } else if (UseDBCcaseConverter || UseIgnoreCase) { |
| | | t1 = ToSenseWord(t1); |
| | | } |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | pChar = t1; |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _first[t]; |
| | | } else { |
| | | final int index = _nextIndex[p].IndexOf(t); |
| | | if (index > -1) { |
| | | next = _nextIndex[p].GetValue(index); |
| | | } else if (UseDuplicateWordFilter && pChar == t1) { |
| | | next = p; |
| | | } else { |
| | | next = _first[t]; |
| | | } |
| | | } |
| | | |
| | | if (next != 0) { |
| | | if (_end[next] < _end[next + 1] && CheckNextChar(text, t1, i)) { |
| | | for (int j = _end[next]; j < _end[next + 1]; j++) { |
| | | final int index = _resultIndex[j]; |
| | | final IllegalWordsSearchResult r = GetGetIllegalResult(text, i, index); |
| | | if (r != null) { |
| | | return r; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | p = next; |
| | | pChar = t1; |
| | | } |
| | | return null; |
| | | } |
| | | |
| | | /** |
| | | * 判断文本是否包含关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public boolean ContainsAny(final String text) { |
| | | int p = 0; |
| | | char pChar = (char) 0; |
| | | |
| | | for (int i = 0; i < text.length(); i++) { |
| | | char t1 = text.charAt(i); |
| | | if (UseSkipWordFilter) { |
| | | if (SkipWordFilter != null) {// 跳词跳过 |
| | | if (SkipWordFilter.apply(new SkipWordFilterHandler(t1, text, i))) { |
| | | continue; |
| | | } |
| | | } else if (_skipBitArray[t1]) { |
| | | continue; |
| | | } |
| | | } |
| | | |
| | | if (CharTranslate != null) { // 字符串转换 |
| | | t1 = CharTranslate.apply(new CharTranslateHandler(t1, text, i)); |
| | | } else if (UseDBCcaseConverter || UseIgnoreCase) { |
| | | t1 = ToSenseWord(t1); |
| | | } |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | pChar = t1; |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _first[t]; |
| | | } else { |
| | | final int index = _nextIndex[p].IndexOf(t); |
| | | if (index > -1) { |
| | | next = _nextIndex[p].GetValue(index); |
| | | } else if (UseDuplicateWordFilter && pChar == t1) { |
| | | next = p; |
| | | } else { |
| | | next = _first[t]; |
| | | } |
| | | } |
| | | |
| | | if (next != 0) { |
| | | if (_end[next] < _end[next + 1] && CheckNextChar(text, t1, i)) { |
| | | for (int j = _end[next]; j < _end[next + 1]; j++) { |
| | | final int index = _resultIndex[j]; |
| | | final IllegalWordsSearchResult r = GetGetIllegalResult(text, i, index); |
| | | if (r != null) { |
| | | return true; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | p = next; |
| | | pChar = t1; |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | /** |
| | | * 在文本中替换所有的关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public String Replace(final String text) { |
| | | return Replace(text, '*'); |
| | | } |
| | | |
| | | /** |
| | | * 在文本中替换所有的关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @param replaceChar 文本 |
| | | * @return |
| | | */ |
| | | public String Replace(final String text, final char replaceChar) { |
| | | final StringBuilder result = new StringBuilder(text); |
| | | |
| | | int p = 0; |
| | | char pChar = (char) 0; |
| | | |
| | | for (int i = 0; i < text.length(); i++) { |
| | | char t1 = text.charAt(i); |
| | | if (UseSkipWordFilter) { |
| | | if (SkipWordFilter != null) {// 跳词跳过 |
| | | if (SkipWordFilter.apply(new SkipWordFilterHandler(t1, text, i))) { |
| | | continue; |
| | | } |
| | | } else if (_skipBitArray[t1]) { |
| | | continue; |
| | | } |
| | | } |
| | | |
| | | if (CharTranslate != null) { // 字符串转换 |
| | | t1 = CharTranslate.apply(new CharTranslateHandler(t1, text, i)); |
| | | } else if (UseDBCcaseConverter || UseIgnoreCase) { |
| | | t1 = ToSenseWord(t1); |
| | | } |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | pChar = t1; |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _first[t]; |
| | | } else { |
| | | final int index = _nextIndex[p].IndexOf(t); |
| | | if (index > -1) { |
| | | next = _nextIndex[p].GetValue(index); |
| | | } else if (UseDuplicateWordFilter && pChar == t1) { |
| | | next = p; |
| | | } else { |
| | | next = _first[t]; |
| | | } |
| | | } |
| | | |
| | | if (next != 0) { |
| | | if (_end[next] < _end[next + 1] && CheckNextChar(text, t1, i)) { |
| | | for (int j = _end[next]; j < _end[next + 1]; j++) { |
| | | final int index = _resultIndex[j]; |
| | | final IllegalWordsSearchResult r = GetGetIllegalResult(text, i, index); |
| | | if (r != null) { |
| | | for (int k = r.Start; k <= r.End; k++) { |
| | | result.setCharAt(k, replaceChar); |
| | | } |
| | | break; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | p = next; |
| | | pChar = t1; |
| | | } |
| | | return result.toString(); |
| | | } |
| | | |
| | | private boolean CheckNextChar(final String text, final char c, final int end) { |
| | | if (IsEnglishOrNumber(c) == false) { |
| | | return true; |
| | | } |
| | | if (end + 1 < text.length()) { |
| | | char e1 = text.charAt(end + 1); |
| | | if (UseSkipWordFilter) { |
| | | if (SkipWordFilter != null) {// 跳词跳过 |
| | | if (SkipWordFilter.apply(new SkipWordFilterHandler(e1, text, end + 1))) { |
| | | return true; |
| | | } |
| | | } else if (_skipBitArray[e1]) { |
| | | return true; |
| | | } |
| | | } |
| | | if (CharTranslate != null) { // 字符串转换 |
| | | e1 = CharTranslate.apply(new CharTranslateHandler(e1, text, end + 1)); |
| | | } else if (UseDBCcaseConverter || UseIgnoreCase) { |
| | | e1 = ToSenseWord(e1); |
| | | } |
| | | if (IsEnglishOrNumber(e1)) { |
| | | return false; |
| | | } |
| | | } |
| | | return true; |
| | | } |
| | | |
| | | private IllegalWordsSearchResult GetGetIllegalResult(String text, int end, int index) { |
| | | String key = _keywords[index]; |
| | | |
| | | int keyIndex = key.length() - 1; |
| | | int start = end; |
| | | for (int i = end; i >= 0; i--) { |
| | | char s2 = text.charAt(i); |
| | | if (UseSkipWordFilter) { |
| | | if (SkipWordFilter != null) { |
| | | if (SkipWordFilter.apply(new SkipWordFilterHandler(s2, text, i))) { |
| | | continue; |
| | | } |
| | | } else if (_skipBitArray[s2]) { |
| | | continue; |
| | | } |
| | | } |
| | | |
| | | if (CharTranslate != null) { // 字符串转换 |
| | | s2 = CharTranslate.apply(new CharTranslateHandler(s2, text, i)); |
| | | } else if (UseDBCcaseConverter || UseIgnoreCase) { |
| | | s2 = ToSenseWord(s2); |
| | | } |
| | | if (s2 == key.charAt(keyIndex)) { |
| | | keyIndex--; |
| | | if (keyIndex == -1) { |
| | | start = i; |
| | | break; |
| | | } |
| | | } |
| | | } |
| | | for (int i = start; i >= 0; i--) { |
| | | char s2 = text.charAt(i); |
| | | if (CharTranslate != null) { // 字符串转换 |
| | | s2 = CharTranslate.apply(new CharTranslateHandler(s2, text, i)); |
| | | } else if (UseDBCcaseConverter || UseIgnoreCase) { |
| | | s2 = ToSenseWord(s2); |
| | | } |
| | | if (s2 != key.charAt(0)) { |
| | | break; |
| | | } |
| | | start = i; |
| | | } |
| | | return GetGetIllegalResult(text, key, start, end, index); |
| | | } |
| | | |
| | | private IllegalWordsSearchResult GetGetIllegalResult(String text, String key, int start, int end, int index) { |
| | | if (start > 0) { |
| | | char s1 = text.charAt(start); |
| | | if (CharTranslate != null) { // 字符串转换 |
| | | s1 = CharTranslate.apply(new CharTranslateHandler(s1, text, start)); |
| | | } |
| | | if (IsEnglishOrNumber(s1)) { |
| | | char s2 = text.charAt(start - 1); |
| | | if (CharTranslate != null) { // 字符串转换 |
| | | s2 = CharTranslate.apply(new CharTranslateHandler(s2, text, start - 1)); |
| | | } else if (UseDBCcaseConverter || UseIgnoreCase) { |
| | | s2 = ToSenseWord(s2); |
| | | } |
| | | if (IsEnglishOrNumber(s2)) { |
| | | return null; |
| | | } |
| | | } |
| | | } |
| | | |
| | | final String keyword = text.substring(start, end + 1); |
| | | final int bl = _blacklist.length > index ? _blacklist[index] : 0; |
| | | if (StringMatch != null) { |
| | | if (StringMatch.apply(new StringMatchHandler(text, start, end, keyword, index, key, _blacklist[index]))) { |
| | | return new IllegalWordsSearchResult(keyword, start, end, index, key, bl); |
| | | } |
| | | return null; |
| | | } |
| | | return new IllegalWordsSearchResult(keyword, start, end, index, key, bl); |
| | | } |
| | | |
| | | /** |
| | | * 设置黑名单 |
| | | * |
| | | * @param blacklist |
| | | * @throws IllegalArgumentException |
| | | */ |
| | | public void SetBlacklist(final int[] blacklist) throws IllegalArgumentException { |
| | | if (_keywords == null) { |
| | | throw new IllegalArgumentException("请先使用SetKeywords方法设置关键字!"); |
| | | } |
| | | if (blacklist.length != _keywords.length) { |
| | | throw new IllegalArgumentException("请关键字与黑名单列表的长度要一样长!"); |
| | | } |
| | | _blacklist = blacklist; |
| | | } |
| | | |
| | | private Boolean IsEnglishOrNumber(final char c) { |
| | | if (c < 128) { |
| | | if ((c >= '0' && c <= '9') || (c >= 'a' && c <= 'z') || (c >= 'A' && c <= 'Z')) { |
| | | return true; |
| | | } |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | private String ToSenseWord(final String text) { |
| | | final StringBuilder stringBuilder = new StringBuilder(text.length()); |
| | | for (int i = 0; i < text.length(); i++) { |
| | | stringBuilder.append(ToSenseWord(text.charAt(i))); |
| | | } |
| | | return stringBuilder.toString(); |
| | | } |
| | | |
| | | private Character ToSenseWord(final Character c) { |
| | | |
| | | if (UseIgnoreCase) { |
| | | if (c >= 'A' && c <= 'Z') |
| | | return (char) (c | 0x20); |
| | | } |
| | | if (UseDBCcaseConverter) { |
| | | if (c == 12288) |
| | | return ' '; |
| | | if (c >= 65280 && c < 65375) { |
| | | Character k = (char) (c - 65248); |
| | | if (UseIgnoreCase) { |
| | | if ('A' <= k && k <= 'Z') { |
| | | k = (char) (k | 0x20); |
| | | } |
| | | } |
| | | return (char) k; |
| | | } |
| | | } |
| | | return c; |
| | | } |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words; |
| | | |
| | | |
| | | public class IllegalWordsSearchResult |
| | | { |
| | | public IllegalWordsSearchResult(final String keyword, final int start, final int end, final int index, |
| | | final String matchKeyword, final int type) |
| | | { |
| | | MatchKeyword = matchKeyword; |
| | | End = end; |
| | | Start = start; |
| | | Index = index; |
| | | Keyword = keyword; |
| | | BlacklistType = type; |
| | | } |
| | | |
| | | /**开始位置 */ |
| | | public int Start; |
| | | /**结束位置 */ |
| | | public int End ; |
| | | /**原始文本 */ |
| | | public String Keyword ; |
| | | /**关键字 */ |
| | | public String MatchKeyword; |
| | | /**黑名单类型 */ |
| | | public int BlacklistType ; |
| | | /**索引 */ |
| | | public int Index; |
| | | |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words; |
| | | |
| | | import java.io.ByteArrayOutputStream; |
| | | import java.io.IOException; |
| | | import java.io.InputStream; |
| | | import java.util.HashMap; |
| | | import java.util.Map; |
| | | import java.util.concurrent.ConcurrentHashMap; |
| | | |
| | | /** |
| | | * @author yongxuan.he |
| | | * @date 2020/3/17 |
| | | */ |
| | | public class NumHelper { |
| | | |
| | | public enum SerializableType { |
| | | /** 大小在-128~127之间的整数,占用空间为1字节 */ |
| | | TINY_INT(1), |
| | | /** 大小在-32768~32767之间的整数,占用空间为2字节 */ |
| | | SMALL_INT(2), |
| | | /** 大小在-8388608~8388607之间的整数,占用空间为3字节 */ |
| | | MEDIUM_INT(3), |
| | | /** int类型 */ |
| | | INT(4), |
| | | ; |
| | | |
| | | private final int flag; |
| | | |
| | | private static Map<Integer, SerializableType> typeMap; |
| | | static { |
| | | Map<Integer, SerializableType> tmpMap = new HashMap<>(); |
| | | for (SerializableType type : SerializableType.values()) { |
| | | tmpMap.put(type.getFlag(), type); |
| | | } |
| | | typeMap = tmpMap; |
| | | } |
| | | public static SerializableType getType(int flag) { |
| | | return typeMap.get(flag); |
| | | } |
| | | |
| | | SerializableType(int flag) { |
| | | this.flag = flag; |
| | | } |
| | | |
| | | public int getFlag() { |
| | | return flag; |
| | | } |
| | | } |
| | | |
| | | private interface Serializer { |
| | | byte[] serialize(int a); |
| | | } |
| | | |
| | | private static Serializer tinyIntWriter = v -> { |
| | | if (v < -128 || v > 127) { |
| | | throw new RuntimeException("not tinyInt: " + v); |
| | | } |
| | | ByteArrayOutputStream out = new ByteArrayOutputStream(); |
| | | out.write(v); |
| | | return out.toByteArray(); |
| | | }; |
| | | private static Serializer smallIntWriter = v -> { |
| | | if (v < -32768 || v > 32767) { |
| | | throw new RuntimeException("not smallInt: " + v); |
| | | } |
| | | ByteArrayOutputStream out = new ByteArrayOutputStream(); |
| | | out.write((v >>> 8) & 0xFF); |
| | | out.write(v & 0xFF); |
| | | return out.toByteArray(); |
| | | }; |
| | | private static Serializer mediumIntWriter = v -> { |
| | | if (v < -8388608 || v > 8388607) { |
| | | throw new RuntimeException("not mediumInt: " + v); |
| | | } |
| | | ByteArrayOutputStream out = new ByteArrayOutputStream(); |
| | | out.write((v >>> 16) & 0xFF); |
| | | out.write((v >>> 8) & 0xFF); |
| | | out.write(v & 0xFF); |
| | | return out.toByteArray(); |
| | | }; |
| | | |
| | | private static Serializer intWriter = v -> { |
| | | ByteArrayOutputStream out = new ByteArrayOutputStream(); |
| | | out.write((v >>> 24) & 0xFF); |
| | | out.write((v >>> 16) & 0xFF); |
| | | out.write((v >>> 8) & 0xFF); |
| | | out.write(v & 0xFF); |
| | | return out.toByteArray(); |
| | | }; |
| | | |
| | | private static final Map<SerializableType, Serializer> simpleWriterMap = new ConcurrentHashMap<>(); |
| | | static { |
| | | simpleWriterMap.put(SerializableType.TINY_INT, tinyIntWriter); |
| | | simpleWriterMap.put(SerializableType.SMALL_INT, smallIntWriter); |
| | | simpleWriterMap.put(SerializableType.MEDIUM_INT, mediumIntWriter); |
| | | simpleWriterMap.put(SerializableType.INT, intWriter); |
| | | } |
| | | |
| | | public static byte[] serialize(int v) { |
| | | ByteArrayOutputStream out = new ByteArrayOutputStream(); |
| | | Serializer serializer; |
| | | int typeFlag; |
| | | |
| | | if(v >= -128 && v<= 127) { |
| | | serializer = simpleWriterMap.get(SerializableType.TINY_INT); |
| | | typeFlag = SerializableType.TINY_INT.getFlag(); |
| | | } else if(v >= -32768 && v <= 32767) { |
| | | serializer = simpleWriterMap.get(SerializableType.SMALL_INT); |
| | | typeFlag = SerializableType.SMALL_INT.getFlag(); |
| | | } else if(v >= -8388608 && v <= 8388607){ |
| | | serializer = simpleWriterMap.get(SerializableType.MEDIUM_INT); |
| | | typeFlag = SerializableType.MEDIUM_INT.getFlag(); |
| | | } else { |
| | | serializer = simpleWriterMap.get(SerializableType.INT); |
| | | typeFlag = SerializableType.INT.getFlag(); |
| | | } |
| | | out.write(typeFlag); |
| | | byte[] bytes = serializer.serialize(v); |
| | | out.write(bytes, 0, bytes.length); |
| | | return out.toByteArray(); |
| | | } |
| | | |
| | | public interface Deserializer { |
| | | int deserialize(InputStream in) throws IOException; |
| | | } |
| | | |
| | | private static Deserializer tinyIntReader = in -> { |
| | | int ch = in.read(); |
| | | if (ch < 0) |
| | | throw new RuntimeException("deserializing"); |
| | | if ((0x80 & ch) == 0x80) { |
| | | ch = 0xffffff00 | ch; |
| | | } |
| | | return ch; |
| | | }; |
| | | private static Deserializer smallIntReader = in -> { |
| | | int ch1 = in.read(); |
| | | int ch2 = in.read(); |
| | | if ((ch1 | ch2) < 0) |
| | | throw new RuntimeException("deserializing"); |
| | | int ch = (ch1 << 8) + ch2; |
| | | if ((0x8000 & ch) == 0x8000) { |
| | | ch = 0xffff0000 | ch; |
| | | } |
| | | return ch; |
| | | }; |
| | | private static Deserializer mediumIntReader = in -> { |
| | | int ch1 = in.read(); |
| | | int ch2 = in.read(); |
| | | int ch3 = in.read(); |
| | | if ((ch1 | ch2 | ch3) < 0) |
| | | throw new RuntimeException("deserializing"); |
| | | int ch = (ch1 << 16) + (ch2 << 8) + (ch3); |
| | | if ((0x800000 & ch) == 0x800000) { |
| | | ch = 0xff000000 | ch; |
| | | } |
| | | return ch; |
| | | }; |
| | | |
| | | private static Deserializer intReader = in -> { |
| | | int ch1 = in.read(); |
| | | int ch2 = in.read(); |
| | | int ch3 = in.read(); |
| | | int ch4 = in.read(); |
| | | if ((ch1 | ch2 | ch3 | ch4) < 0) |
| | | throw new RuntimeException("deserializing"); |
| | | return ((ch1 << 24) + (ch2 << 16) + (ch3 << 8) + ch4); |
| | | }; |
| | | |
| | | private static final Map<SerializableType, Deserializer> simpleReaderMap = new ConcurrentHashMap<>(); |
| | | static { |
| | | simpleReaderMap.put(SerializableType.TINY_INT, tinyIntReader); |
| | | simpleReaderMap.put(SerializableType.SMALL_INT, smallIntReader); |
| | | simpleReaderMap.put(SerializableType.MEDIUM_INT, mediumIntReader); |
| | | simpleReaderMap.put(SerializableType.INT, intReader); |
| | | } |
| | | |
| | | public static int read(InputStream in) throws IOException { |
| | | int flag = in.read(); |
| | | SerializableType type = SerializableType.getType(flag); |
| | | Deserializer deserializer = simpleReaderMap.get(type); |
| | | |
| | | if(deserializer == null) { |
| | | throw new RuntimeException("wrong flag: " + flag); |
| | | } |
| | | return deserializer.deserialize(in); |
| | | } |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words; |
| | | |
| | | import org.springblade.modules.words.internals.BasePinyinMatch; |
| | | import org.springblade.modules.words.internals.PinyinDict; |
| | | import org.springblade.modules.words.internals.TwoTuple; |
| | | |
| | | import java.io.IOException; |
| | | import java.util.ArrayList; |
| | | import java.util.List; |
| | | |
| | | public class PinyinMatch extends BasePinyinMatch { |
| | | private String[] _keywords; |
| | | private String[] _keywordsFirstPinyin; |
| | | private String[][] _keywordsPinyin; |
| | | private int[] _indexs; |
| | | |
| | | /** |
| | | * 设置关键字,注:索引会被清空 |
| | | * |
| | | * @param keywords |
| | | * @throws NumberFormatException |
| | | * @throws IOException |
| | | */ |
| | | public void SetKeywords(final List<String> keywords) throws NumberFormatException, IOException { |
| | | _keywords = keywords.toArray(new String[0]); |
| | | _keywordsFirstPinyin = new String[_keywords.length]; |
| | | _keywordsPinyin = new String[_keywords.length][]; |
| | | for (int i = 0; i < _keywords.length; i++) { |
| | | final String text = _keywords[i]; |
| | | final String[] pys = PinyinDict.GetPinyinList(text, 0); |
| | | String fpy = ""; |
| | | for (int j = 0; j < pys.length; j++) { |
| | | pys[j] = pys[j].toUpperCase(); |
| | | fpy += pys[j].charAt(0); |
| | | } |
| | | _keywordsPinyin[i] = pys; |
| | | _keywordsFirstPinyin[i] = fpy; |
| | | } |
| | | _indexs = null; |
| | | } |
| | | |
| | | /** |
| | | * 设置关键字,注:索引会被清空 |
| | | * |
| | | * @param keywords |
| | | * @param pinyin |
| | | */ |
| | | public void SetKeywords(final List<String> keywords, final List<String> pinyin) { |
| | | SetKeywords(keywords, pinyin, ','); |
| | | } |
| | | |
| | | /** |
| | | * 设置关键字,注:索引会被清空 |
| | | * |
| | | * @param keywords |
| | | * @param pinyin |
| | | * @param splitChar |
| | | */ |
| | | public void SetKeywords(final List<String> keywords, final List<String> pinyin, final char splitChar) { |
| | | _keywords = keywords.toArray(new String[0]); |
| | | _keywordsFirstPinyin = new String[_keywords.length]; |
| | | _keywordsPinyin = new String[_keywords.length][]; |
| | | for (int i = 0; i < _keywords.length; i++) { |
| | | final String text = pinyin.get(i); |
| | | final String[] pys = text.split(((Character) splitChar).toString()); |
| | | String fpy = ""; |
| | | for (int j = 0; j < pys.length; j++) { |
| | | pys[j] = pys[j].toUpperCase(); |
| | | fpy += pys[j].charAt(0); |
| | | } |
| | | _keywordsPinyin[i] = pys; |
| | | _keywordsFirstPinyin[i] = fpy; |
| | | } |
| | | _indexs = null; |
| | | } |
| | | |
| | | /** |
| | | * 设置索引 |
| | | * |
| | | * @param indexs |
| | | * @throws Exception |
| | | */ |
| | | public void SetIndexs(final List<Integer> indexs) throws Exception { |
| | | if (_keywords == null) { |
| | | throw new Exception("请先使用 SetKeywords 方法"); |
| | | } |
| | | if (indexs.size() < _keywords.length) { |
| | | throw new Exception("indexs 数组长度大于 keywords"); |
| | | } |
| | | _indexs = new int[indexs.size()]; |
| | | for (int i = 0; i < indexs.size(); i++) { |
| | | _indexs[i] = indexs.get(i); |
| | | } |
| | | } |
| | | |
| | | /** |
| | | * 查询 |
| | | * |
| | | * @param key |
| | | * @return |
| | | * @throws NumberFormatException |
| | | * @throws IOException |
| | | */ |
| | | public List<String> Find(String key) throws NumberFormatException, IOException { |
| | | key = key.toUpperCase().trim(); |
| | | if (key == null || key.equals("")) { |
| | | return null; |
| | | } |
| | | |
| | | final boolean hasPinyin = key.matches("^.*?[A-Z]+.*$");// Pattern.matches("[a-zA-Z]",key); |
| | | if (hasPinyin == false) { |
| | | final List<String> rs = new ArrayList<String>(); |
| | | for (int i = 0; i < _keywords.length; i++) { |
| | | final String keyword = _keywords[i]; |
| | | if (keyword.contains(key)) { |
| | | rs.add(keyword); |
| | | } |
| | | } |
| | | return rs; |
| | | } |
| | | |
| | | final List<String> pykeys = SplitKeywords(key); |
| | | int minLength = Integer.MAX_VALUE; |
| | | final List<TwoTuple<String, String[]>> list = new ArrayList<TwoTuple<String, String[]>>(); |
| | | for (final String pykey : pykeys) { |
| | | final String[] keys = pykey.split(((Character) (char) 0).toString()); |
| | | if (minLength > keys.length) { |
| | | minLength = keys.length; |
| | | } |
| | | MergeKeywords(keys, 0, "", list); |
| | | } |
| | | |
| | | final PinyinSearch search = new PinyinSearch(); |
| | | search.SetKeywords2(list); |
| | | final List<String> result = new ArrayList<String>(); |
| | | for (int i = 0; i < _keywords.length; i++) { |
| | | final String keywords = _keywords[i]; |
| | | if (keywords.length() < minLength) { |
| | | continue; |
| | | } |
| | | final String fpy = _keywordsFirstPinyin[i]; |
| | | final String[] pylist = _keywordsPinyin[i]; |
| | | |
| | | if (search.Find(fpy, keywords, pylist)) { |
| | | result.add(keywords); |
| | | } |
| | | } |
| | | return result; |
| | | } |
| | | |
| | | /** |
| | | * 查询索引 |
| | | * |
| | | * @param key |
| | | * @return |
| | | * @throws NumberFormatException |
| | | * @throws IOException |
| | | */ |
| | | public List<Integer> FindIndex(String key) throws NumberFormatException, IOException { |
| | | key = key.toUpperCase().trim(); |
| | | if (key == null || key.equals("")) { |
| | | return null; |
| | | } |
| | | final boolean hasPinyin = key.matches("^.*?[A-Z]+.*$");// Pattern.matches("[a-zA-Z]",key); |
| | | if (hasPinyin == false) { |
| | | final List<Integer> rs = new ArrayList<Integer>(); |
| | | for (int i = 0; i < _keywords.length; i++) { |
| | | final String keyword = _keywords[i]; |
| | | if (keyword.contains(key)) { |
| | | if (_indexs == null) { |
| | | rs.add(i); |
| | | } else { |
| | | rs.add(_indexs[i]); |
| | | } |
| | | } |
| | | } |
| | | return rs; |
| | | } |
| | | |
| | | final List<String> pykeys = SplitKeywords(key); |
| | | int minLength = Integer.MAX_VALUE; |
| | | final List<TwoTuple<String, String[]>> list = new ArrayList<TwoTuple<String, String[]>>(); |
| | | for (final String pykey : pykeys) { |
| | | final String[] keys = pykey.split(((Character) (char) 0).toString()); |
| | | if (minLength > keys.length) { |
| | | minLength = keys.length; |
| | | } |
| | | MergeKeywords(keys, 0, "", list); |
| | | } |
| | | |
| | | final PinyinSearch search = new PinyinSearch(); |
| | | search.SetKeywords2(list); |
| | | final List<Integer> result = new ArrayList<Integer>(); |
| | | for (int i = 0; i < _keywords.length; i++) { |
| | | final String keywords = _keywords[i]; |
| | | if (keywords.length() < minLength) { |
| | | continue; |
| | | } |
| | | final String fpy = _keywordsFirstPinyin[i]; |
| | | final String[] pylist = _keywordsPinyin[i]; |
| | | if (search.Find(fpy, keywords, pylist)) { |
| | | if (_indexs == null) { |
| | | result.add(i); |
| | | } else { |
| | | result.add(_indexs[i]); |
| | | } |
| | | } |
| | | } |
| | | return result; |
| | | } |
| | | |
| | | /** |
| | | * 查询,空格为通配符 |
| | | * |
| | | * @param keywords |
| | | * @return |
| | | * @throws NumberFormatException |
| | | * @throws IOException |
| | | */ |
| | | public List<String> FindWithSpace(String keywords) throws NumberFormatException, IOException { |
| | | keywords = keywords.toUpperCase().trim(); |
| | | if (keywords == null || keywords.equals("")) { |
| | | return null; |
| | | } |
| | | if (keywords.contains(" ") == false) { |
| | | return Find(keywords); |
| | | } |
| | | |
| | | final List<TwoTuple<String, String[]>> list = new ArrayList<TwoTuple<String, String[]>>(); |
| | | final List<Integer> indexs = new ArrayList<Integer>(); |
| | | int minLength = 0; |
| | | int keysCount; |
| | | { |
| | | final String[] keys = keywords.split(" "); |
| | | keysCount = keys.length; |
| | | for (int i = 0; i < keys.length; i++) { |
| | | final String key = keys[i]; |
| | | final List<String> pykeys = SplitKeywords(key); |
| | | int min = Integer.MAX_VALUE; |
| | | for (final String pykey : pykeys) { |
| | | final String[] keys2 = pykey.split(((Character) (char) 0).toString()); |
| | | if (min > keys2.length) { |
| | | min = keys2.length; |
| | | } |
| | | MergeKeywords(keys2, 0, "", list, i, indexs); |
| | | } |
| | | minLength += min; |
| | | } |
| | | } |
| | | |
| | | final PinyinSearch search = new PinyinSearch(); |
| | | search.SetKeywords2(list); |
| | | search.SetIndexs(indexs); |
| | | |
| | | final List<String> result = new ArrayList<String>(); |
| | | for (int i = 0; i < _keywords.length; i++) { |
| | | final String keywords2 = _keywords[i]; |
| | | if (keywords2.length() < minLength) { |
| | | continue; |
| | | } |
| | | final String fpy = _keywordsFirstPinyin[i]; |
| | | final String[] pylist = _keywordsPinyin[i]; |
| | | |
| | | if (search.Find2(fpy, keywords2, pylist, keysCount)) { |
| | | result.add(keywords2); |
| | | } |
| | | } |
| | | return result; |
| | | } |
| | | |
| | | /** |
| | | * 查询索引号,空格为通配符 |
| | | * |
| | | * @param keywords |
| | | * @return |
| | | * @throws NumberFormatException |
| | | * @throws IOException |
| | | */ |
| | | public List<Integer> FindIndexWithSpace(String keywords) throws NumberFormatException, IOException { |
| | | keywords = keywords.toUpperCase().trim(); |
| | | if (keywords == null || keywords.equals("")) { |
| | | return null; |
| | | } |
| | | if (keywords.contains(" ") == false) { |
| | | return FindIndex(keywords); |
| | | } |
| | | |
| | | final List<TwoTuple<String, String[]>> list = new ArrayList<TwoTuple<String, String[]>>(); |
| | | final List<Integer> indexs = new ArrayList<Integer>(); |
| | | int minLength = 0; |
| | | int keysCount; |
| | | { |
| | | final String[] keys = keywords.split(" "); |
| | | keysCount = keys.length; |
| | | for (int i = 0; i < keys.length; i++) { |
| | | final String key = keys[i]; |
| | | final List<String> pykeys = SplitKeywords(key); |
| | | int min = Integer.MAX_VALUE; |
| | | for (final String pykey : pykeys) { |
| | | final String[] keys2 = pykey.split(((Character) (char) 0).toString()); |
| | | if (min > keys2.length) { |
| | | min = keys2.length; |
| | | } |
| | | MergeKeywords(keys2, 0, "", list, i, indexs); |
| | | } |
| | | minLength += min; |
| | | } |
| | | } |
| | | |
| | | final PinyinSearch search = new PinyinSearch(); |
| | | search.SetKeywords2(list); |
| | | search.SetIndexs(indexs); |
| | | |
| | | final List<Integer> result = new ArrayList<Integer>(); |
| | | for (int i = 0; i < _keywords.length; i++) { |
| | | final String keywords2 = _keywords[i]; |
| | | if (keywords2.length() < minLength) { |
| | | continue; |
| | | } |
| | | final String fpy = _keywordsFirstPinyin[i]; |
| | | final String[] pylist = _keywordsPinyin[i]; |
| | | if (search.Find2(fpy, keywords2, pylist, keysCount)) { |
| | | if (_indexs == null) { |
| | | result.add(i); |
| | | } else { |
| | | result.add(_indexs[i]); |
| | | } |
| | | } |
| | | } |
| | | return result; |
| | | } |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words; |
| | | |
| | | import org.springblade.modules.words.internals.BasePinyinMatch; |
| | | import org.springblade.modules.words.internals.PinyinDict; |
| | | import org.springblade.modules.words.internals.TwoTuple; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.List; |
| | | import java.util.function.Function; |
| | | |
| | | public class PinyinMatch2<T> extends BasePinyinMatch { |
| | | private final List<T> _list; |
| | | private Function<T, String> _keywordsFunc; |
| | | private Function<T, String> _pinyinFunc; |
| | | private char _splitChar = ','; |
| | | |
| | | /** |
| | | * 拼音匹配, 不支持[0x20000-0x2B81D] |
| | | * |
| | | * @param list |
| | | */ |
| | | public PinyinMatch2(final List<T> list) { |
| | | _list = list; |
| | | _keywordsFunc = null; |
| | | _pinyinFunc = null; |
| | | } |
| | | |
| | | /** |
| | | * 设置获取关键字的方法 |
| | | * |
| | | * @param keywordsFunc |
| | | */ |
| | | public void SetKeywordsFunc(final Function<T, String> keywordsFunc) { |
| | | _keywordsFunc = keywordsFunc; |
| | | } |
| | | |
| | | /** |
| | | * 设置获取拼音的方法 |
| | | * |
| | | * @param pinyinFunc |
| | | */ |
| | | public void SetPinyinFunc(final Function<T, String> pinyinFunc) { |
| | | _pinyinFunc = pinyinFunc; |
| | | } |
| | | |
| | | /** |
| | | * 设置拼音分隔符 |
| | | * |
| | | * @param splitChar |
| | | */ |
| | | public void SetPinyinSplitChar(final char splitChar) { |
| | | _splitChar = splitChar; |
| | | } |
| | | |
| | | /** |
| | | * 查询 |
| | | * |
| | | * @param keywords |
| | | * @return |
| | | * @throws Exception |
| | | */ |
| | | public List<T> Find(String keywords) throws Exception { |
| | | if (_keywordsFunc == null) { |
| | | throw new Exception("请先使用SetKeywordsFunc方法。"); |
| | | } |
| | | keywords = keywords.toUpperCase().trim(); |
| | | if (keywords == null || keywords.equals("")) { |
| | | return null; |
| | | } |
| | | final List<T> result = new ArrayList<T>(); |
| | | final boolean hasPinyin = keywords.matches("^.*?[A-Z]+.*$");// Pattern.matches("[a-zA-Z]",key); |
| | | if (hasPinyin == false) { |
| | | for (final T item : _list) { |
| | | final String keyword = _keywordsFunc.apply(item); |
| | | if (keyword.contains(keywords)) { |
| | | result.add(item); |
| | | } |
| | | } |
| | | return result; |
| | | } |
| | | |
| | | final List<String> pykeys = SplitKeywords(keywords); |
| | | int minLength = Integer.MAX_VALUE; |
| | | final List<TwoTuple<String, String[]>> list = new ArrayList<TwoTuple<String, String[]>>(); |
| | | for (final String pykey : pykeys) { |
| | | final String[] keys = pykey.split(((Character) (char) 0).toString()); |
| | | if (minLength > keys.length) { |
| | | minLength = keys.length; |
| | | } |
| | | MergeKeywords(keys, 0, "", list); |
| | | } |
| | | |
| | | final PinyinSearch search = new PinyinSearch(); |
| | | search.SetKeywords2(list); |
| | | for (final T item : _list) { |
| | | final String keyword = _keywordsFunc.apply(item); |
| | | if (keyword.length() < minLength) { |
| | | continue; |
| | | } |
| | | String fpy = ""; |
| | | String[] pylist; |
| | | if (_pinyinFunc == null) { |
| | | pylist = PinyinDict.GetPinyinList(keyword, 0); |
| | | } else { |
| | | pylist = _pinyinFunc.apply(item).split(((Character) _splitChar).toString()); |
| | | } |
| | | for (int j = 0; j < pylist.length; j++) { |
| | | pylist[j] = pylist[j].toUpperCase(); |
| | | fpy += pylist[j].charAt(0); |
| | | } |
| | | if (search.Find(fpy, keyword, pylist)) { |
| | | result.add(item); |
| | | } |
| | | } |
| | | return result; |
| | | } |
| | | |
| | | /** |
| | | * 查询,空格为通配符 |
| | | * |
| | | * @param keywords |
| | | * @return |
| | | * @throws Exception |
| | | */ |
| | | public List<T> FindWithSpace(String keywords) throws Exception { |
| | | if (_keywordsFunc == null) { |
| | | throw new Exception("请先使用SetKeywordsFunc方法。"); |
| | | } |
| | | keywords = keywords.toUpperCase().trim(); |
| | | if (keywords == null || keywords.equals("")) { |
| | | return null; |
| | | } |
| | | if (keywords.contains(" ") == false) { |
| | | return Find(keywords); |
| | | } |
| | | |
| | | final List<TwoTuple<String, String[]>> list = new ArrayList<TwoTuple<String, String[]>>(); |
| | | final List<Integer> indexs = new ArrayList<Integer>(); |
| | | int minLength = 0; |
| | | int keysCount; |
| | | { |
| | | |
| | | final String[] keys = keywords.split(" "); |
| | | keysCount = keys.length; |
| | | for (int i = 0; i < keys.length; i++) { |
| | | final String key = keys[i]; |
| | | final List<String> pykeys = SplitKeywords(key); |
| | | int min = Integer.MAX_VALUE; |
| | | for (final String pykey : pykeys) { |
| | | final String[] keys2 = pykey.split(((Character) (char) 0).toString()); |
| | | if (min > keys2.length) { |
| | | min = keys2.length; |
| | | } |
| | | MergeKeywords(keys2, 0, "", list, i, indexs); |
| | | } |
| | | minLength += min; |
| | | } |
| | | } |
| | | |
| | | final PinyinSearch search = new PinyinSearch(); |
| | | search.SetKeywords2(list); |
| | | search.SetIndexs(indexs); |
| | | |
| | | final List<T> result = new ArrayList<T>(); |
| | | for (final T item : _list) { |
| | | final String keyword = _keywordsFunc.apply(item); |
| | | if (keyword.length() < minLength) { |
| | | continue; |
| | | } |
| | | String fpy = ""; |
| | | String[] pylist; |
| | | if (_pinyinFunc == null) { |
| | | pylist = PinyinDict.GetPinyinList(keyword, 0); |
| | | } else { |
| | | pylist = _pinyinFunc.apply(item).split(((Character)_splitChar).toString()); |
| | | } |
| | | for (int j = 0; j < pylist.length; j++) { |
| | | pylist[j] = pylist[j].toUpperCase(); |
| | | fpy += pylist[j].charAt(0); |
| | | } |
| | | if (search.Find2(fpy, keyword, pylist, keysCount)) { |
| | | result.add(item); |
| | | } |
| | | } |
| | | return result; |
| | | } |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words; |
| | | |
| | | import org.springblade.modules.words.internals.BaseMatch; |
| | | import org.springblade.modules.words.internals.TrieNode3; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.List; |
| | | |
| | | /** |
| | | * 文本搜索匹配, ,支持 部分 正则 如 . ? [ ] \ ( | ) ,不支持( )内再嵌套( ) |
| | | */ |
| | | public class StringMatch extends BaseMatch { |
| | | |
| | | /** |
| | | * 在文本中查找第一个关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public String FindFirst(final String text) { |
| | | TrieNode3 ptr = null; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | |
| | | TrieNode3 tn; |
| | | if (ptr == null) { |
| | | tn = _first[t]; |
| | | } else { |
| | | if (ptr.HasKey(t) == false) { |
| | | if (ptr.HasWildcard) { |
| | | final String result = FindFirst(text, i + 1, ptr.WildcardNode); |
| | | if (result != null) { |
| | | return result; |
| | | } |
| | | } |
| | | tn = _first[t]; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | } |
| | | if (tn != null) { |
| | | if (tn.End) { |
| | | final int length = _keywordLength[tn.Results.get(0)]; |
| | | final int s = i - length + 1; |
| | | if (s >= 0) { |
| | | return text.substring(s, i + 1); |
| | | } |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return null; |
| | | } |
| | | |
| | | private String FindFirst(final String text, final int index, TrieNode3 ptr) { |
| | | for (int i = index; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode3 tn; |
| | | if (ptr.HasKey(t) == false) { |
| | | if (ptr.HasWildcard) { |
| | | final String result = FindFirst(text, i + 1, ptr.WildcardNode); |
| | | if (result != null) { |
| | | return result; |
| | | } |
| | | } |
| | | return null; |
| | | } |
| | | tn = ptr.GetValue(t); |
| | | |
| | | if (tn.End) { |
| | | final int length = _keywordLength[tn.Results.get(0)]; |
| | | final int s = i - length + 1; |
| | | if (s >= 0) { |
| | | return text.substring(s, i + 1); |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return null; |
| | | } |
| | | |
| | | /** |
| | | * 在文本中查找所有的关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public List<String> FindAll(final String text) { |
| | | TrieNode3 ptr = null; |
| | | final List<String> result = new ArrayList<String>(); |
| | | |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode3 tn; |
| | | if (ptr == null) { |
| | | tn = _first[t]; |
| | | } else { |
| | | if (ptr.HasKey(t) == false) { |
| | | if (ptr.HasWildcard) { |
| | | FindAll(text, i + 1, ptr.WildcardNode, result); |
| | | } |
| | | tn = _first[t]; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | } |
| | | if (tn != null) { |
| | | if (tn.End) { |
| | | for (final Integer item : tn.Results) { |
| | | final int length = _keywordLength[item]; |
| | | final int s = i - length + 1; |
| | | if (s >= 0) { |
| | | final String key = text.substring(s, i + 1); |
| | | result.add(key); |
| | | |
| | | } |
| | | } |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return result; |
| | | } |
| | | |
| | | private void FindAll(final String text, final int index, TrieNode3 ptr, final List<String> result) { |
| | | for (int i = index; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode3 tn; |
| | | if (ptr.HasKey(t) == false) { |
| | | if (ptr.HasWildcard) { |
| | | FindAll(text, i + 1, ptr.WildcardNode, result); |
| | | } |
| | | return; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | if (tn.End) { |
| | | for (final Integer item : tn.Results) { |
| | | final int length = _keywordLength[item]; |
| | | final int s = i - length + 1; |
| | | if (s >= 0) { |
| | | final String key = text.substring(s, i + 1); |
| | | result.add(key); |
| | | } |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | } |
| | | |
| | | /** |
| | | * 判断文本是否包含关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public boolean ContainsAny(final String text) { |
| | | TrieNode3 ptr = null; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode3 tn; |
| | | if (ptr == null) { |
| | | tn = _first[t]; |
| | | } else { |
| | | if (ptr.HasKey(t) == false) { |
| | | if (ptr.HasWildcard) { |
| | | final boolean result = ContainsAny(text, i + 1, ptr.WildcardNode); |
| | | if (result) { |
| | | return true; |
| | | } |
| | | } |
| | | tn = _first[t]; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | } |
| | | if (tn != null) { |
| | | if (tn.End) { |
| | | final int length = _keywordLength[tn.Results.get(0)]; |
| | | final int s = i - length + 1; |
| | | if (s >= 0) { |
| | | return true; |
| | | } |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | private boolean ContainsAny(final String text, final int index, TrieNode3 ptr) { |
| | | for (int i = index; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode3 tn; |
| | | if (ptr.HasKey(t) == false) { |
| | | if (ptr.HasWildcard) { |
| | | return ContainsAny(text, i + 1, ptr.WildcardNode); |
| | | } |
| | | return false; |
| | | } |
| | | tn = ptr.GetValue(t); |
| | | |
| | | if (tn.End) { |
| | | final int length = _keywordLength[tn.Results.get(0)]; |
| | | final int s = i - length + 1; |
| | | if (s >= 0) { |
| | | return true; |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | /** |
| | | * 在文本中替换所有的关键字, 替换符默认为 * |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public String Replace(final String text) { |
| | | return Replace(text, '*'); |
| | | } |
| | | |
| | | /** |
| | | * 在文本中替换所有的关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @param replaceChar 替换符 |
| | | * @return |
| | | */ |
| | | public String Replace(final String text, final char replaceChar) { |
| | | final StringBuilder result = new StringBuilder(text); |
| | | |
| | | TrieNode3 ptr = null; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode3 tn; |
| | | if (ptr == null) { |
| | | tn = _first[t]; |
| | | } else { |
| | | if (ptr.HasKey(t) == false) { |
| | | if (ptr.HasWildcard) { |
| | | Replace(text, i + 1, ptr.WildcardNode, replaceChar, result); |
| | | } |
| | | tn = _first[t]; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | } |
| | | if (tn != null) { |
| | | if (tn.End) { |
| | | final int maxLength = _keywordLength[tn.Results.get(0)]; |
| | | final int start = i + 1 - maxLength; |
| | | if (start >= 0) { |
| | | for (int j = start; j <= i; j++) { |
| | | result.setCharAt(j, replaceChar); |
| | | } |
| | | } |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return result.toString(); |
| | | } |
| | | |
| | | private void Replace(final String text, final int index, TrieNode3 ptr, final char replaceChar, |
| | | final StringBuilder result) { |
| | | for (int i = index; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode3 tn; |
| | | if (ptr.HasKey(t) == false) { |
| | | if (ptr.HasWildcard) { |
| | | Replace(text, i + 1, ptr.WildcardNode, replaceChar, result); |
| | | } |
| | | return; |
| | | } |
| | | tn = ptr.GetValue(t); |
| | | if (tn.End) { |
| | | final int maxLength = _keywordLength[tn.Results.get(0)]; |
| | | final int start = i + 1 - maxLength; |
| | | if (start >= 0) { |
| | | for (int j = start; j <= i; j++) { |
| | | result.setCharAt(j, replaceChar); |
| | | } |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | } |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words; |
| | | |
| | | import org.springblade.modules.words.internals.BaseMatchEx; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.List; |
| | | |
| | | public class StringMatchEx extends BaseMatchEx { |
| | | |
| | | /// <summary> |
| | | /// 在文本中查找第一个关键字 |
| | | /// </summary> |
| | | /// <param name="text">文本</param> |
| | | /// <returns></returns> |
| | | public String FindFirst(final String text) { |
| | | int p = 0; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | final int t = _dict[t1]; |
| | | |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | final int index = _nextIndex[p].IndexOf(t); |
| | | if (index == -1) { |
| | | if (_wildcard[p] > 0) { |
| | | final String r = FindFirst(text, i + 1, _wildcard[p]); |
| | | if (r != null) { |
| | | return r; |
| | | } |
| | | } |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index); |
| | | } |
| | | } |
| | | if (next != 0) { |
| | | final int start = _end[next]; |
| | | if (start < _end[next + 1]) { |
| | | final int length = _keywordLength[_resultIndex[start]]; |
| | | final int s = i - length + 1; |
| | | if (s >= 0) { |
| | | return text.substring(s, i + 1); |
| | | } |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return null; |
| | | } |
| | | |
| | | private String FindFirst(final String text, final int index, int p) { |
| | | for (int i = index; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | return null; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | final int index2 = _nextIndex[p].IndexOf(t); |
| | | if (index2 == -1) { |
| | | if (_wildcard[p] > 0) { |
| | | final String r = FindFirst(text, i + 1, _wildcard[p]); |
| | | if (r != null) { |
| | | return r; |
| | | } |
| | | } |
| | | return null; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index2); |
| | | } |
| | | } |
| | | final int start = _end[next]; |
| | | if (start < _end[next + 1]) { |
| | | final int length = _keywordLength[_resultIndex[start]]; |
| | | final int s = i - length + 1; |
| | | if (s >= 0) { |
| | | return text.substring(s, i + 1); |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return null; |
| | | } |
| | | |
| | | /// <summary> |
| | | /// 在文本中查找所有的关键字 |
| | | /// </summary> |
| | | /// <param name="text">文本</param> |
| | | /// <returns></returns> |
| | | public List<String> FindAll(final String text) { |
| | | final List<String> result = new ArrayList<String>(); |
| | | int p = 0; |
| | | |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | final int index2 = _nextIndex[p].IndexOf(t); |
| | | if (index2 == -1) { |
| | | if (_wildcard[p] > 0) { |
| | | FindAll(text, i + 1, _wildcard[p], result); |
| | | } |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index2); |
| | | } |
| | | } |
| | | |
| | | if (next != 0) { |
| | | for (int j = _end[next]; j < _end[next + 1]; j++) { |
| | | final int length = _keywordLength[_resultIndex[j]]; |
| | | final int s = i - length + 1; |
| | | if (s >= 0) { |
| | | final String key = text.substring(s, i + 1); |
| | | result.add(key); |
| | | } |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return result; |
| | | } |
| | | |
| | | private void FindAll(final String text, final int index, int p, final List<String> result) { |
| | | for (int i = index; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | return; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | final int index2 = _nextIndex[p].IndexOf(t); |
| | | if (index2 == -1) { |
| | | if (_wildcard[p] > 0) { |
| | | FindAll(text, i + 1, _wildcard[p], result); |
| | | } |
| | | return; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index2); |
| | | } |
| | | } |
| | | |
| | | for (int j = _end[next]; j < _end[next + 1]; j++) { |
| | | final int length = _keywordLength[_resultIndex[j]]; |
| | | final int s = i - length + 1; |
| | | if (s >= 0) { |
| | | final String key = text.substring(s, i + 1); |
| | | result.add(key); |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | } |
| | | |
| | | /// <summary> |
| | | /// 判断文本是否包含关键字 |
| | | /// </summary> |
| | | /// <param name="text">文本</param> |
| | | /// <returns></returns> |
| | | public boolean ContainsAny(final String text) { |
| | | int p = 0; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | final int t = _dict[t1]; |
| | | |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | final int index = _nextIndex[p].IndexOf(t); |
| | | if (index == -1) { |
| | | if (_wildcard[p] > 0) { |
| | | final boolean r = ContainsAny(text, i + 1, _wildcard[p]); |
| | | if (r) { |
| | | return true; |
| | | } |
| | | } |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index); |
| | | } |
| | | } |
| | | |
| | | if (next != 0) { |
| | | if (_end[next] < _end[next + 1]) { |
| | | return true; |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | private boolean ContainsAny(final String text, final int index, int p) { |
| | | for (int i = index; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | return false; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | final int index2 = _nextIndex[p].IndexOf(t); |
| | | if (index2 == -1) { |
| | | if (_wildcard[p] > 0) { |
| | | final boolean r = ContainsAny(text, i + 1, _wildcard[p]); |
| | | if (r) { |
| | | return true; |
| | | } |
| | | } |
| | | return false; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index2); |
| | | } |
| | | } |
| | | |
| | | final int start = _end[next]; |
| | | if (start < _end[next + 1]) { |
| | | final int length = _keywordLength[_resultIndex[start]]; |
| | | final int s = i - length + 1; |
| | | if (s >= 0) { |
| | | return true; |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | /// <summary> |
| | | /// 在文本中替换所有的关键字 |
| | | /// </summary> |
| | | /// <param name="text">文本</param> |
| | | /// <param name="replaceChar">替换符</param> |
| | | /// <returns></returns> |
| | | public String Replace(final String text, final char replaceChar) { |
| | | final StringBuilder result = new StringBuilder(text); |
| | | |
| | | int p = 0; |
| | | |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | final int t = _dict[t1]; |
| | | |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | final int index2 = _nextIndex[p].IndexOf(t); |
| | | if (index2 == -1) { |
| | | if (_wildcard[p] > 0) { |
| | | Replace(text, i + 1, _wildcard[p], replaceChar, result); |
| | | } |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index2); |
| | | } |
| | | } |
| | | |
| | | if (next != 0) { |
| | | final int start = _end[next]; |
| | | if (start < _end[next + 1]) { |
| | | final int maxLength = _keywordLength[_resultIndex[start]]; |
| | | final int start2 = i + 1 - maxLength; |
| | | if (start2 >= 0) { |
| | | for (int j = start2; j <= i; j++) { |
| | | result.setCharAt(j, replaceChar); |
| | | } |
| | | } |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return result.toString(); |
| | | } |
| | | |
| | | private void Replace(final String text, final int index, int p, final char replaceChar, |
| | | final StringBuilder result) { |
| | | for (int i = index; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | return; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | final int index2 = _nextIndex[p].IndexOf(t); |
| | | if (index2 == -1) { |
| | | if (_wildcard[p] > 0) { |
| | | Replace(text, i + 1, _wildcard[p], replaceChar, result); |
| | | } |
| | | return; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index2); |
| | | } |
| | | } |
| | | |
| | | final int start = _end[next]; |
| | | if (start < _end[next + 1]) { |
| | | final int maxLength = _keywordLength[_resultIndex[start]]; |
| | | final int start2 = i + 1 - maxLength; |
| | | if (start2 >= 0) { |
| | | for (int j = start2; j <= i; j++) { |
| | | result.setCharAt(j, replaceChar); |
| | | } |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | } |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words; |
| | | |
| | | import org.springblade.modules.words.internals.BaseSearch; |
| | | import org.springblade.modules.words.internals.TrieNode2; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.List; |
| | | |
| | | public class StringSearch extends BaseSearch { |
| | | /** |
| | | * 在文本中查找第一个关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public String FindFirst(final String text) { |
| | | TrieNode2 ptr = null; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode2 tn = null; |
| | | if (ptr == null) { |
| | | tn = _first[t]; |
| | | } else { |
| | | if (ptr.HasKey(t) == false) { |
| | | tn = _first[t]; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | } |
| | | if (tn != null) { |
| | | if (tn.End) { |
| | | return _keywords[tn.Results.get(0)]; |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return null; |
| | | } |
| | | |
| | | /** |
| | | * 在文本中查找所有的关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public List<String> FindAll(final String text) { |
| | | TrieNode2 ptr = null; |
| | | final List<String> list = new ArrayList<String>(); |
| | | |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode2 tn = null; |
| | | if (ptr == null) { |
| | | tn = _first[t]; |
| | | } else { |
| | | if (ptr.HasKey(t) == false) { |
| | | tn = _first[t]; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | } |
| | | if (tn != null) { |
| | | if (tn.End) { |
| | | tn.Results.forEach(item -> { |
| | | list.add(_keywords[item]); |
| | | }); |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return list; |
| | | } |
| | | |
| | | /** |
| | | * 判断文本是否包含关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public boolean ContainsAny(final String text) { |
| | | TrieNode2 ptr = null; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode2 tn = null; |
| | | if (ptr == null) { |
| | | tn = _first[t]; |
| | | } else { |
| | | if (ptr.HasKey(t) == false) { |
| | | tn = _first[t]; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | } |
| | | if (tn != null) { |
| | | if (tn.End) { |
| | | return true; |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | /** |
| | | * 在文本中替换所有的关键字, 替换符默认为 * |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public String Replace(final String text) { |
| | | return Replace(text, '*'); |
| | | } |
| | | |
| | | /** |
| | | * 在文本中替换所有的关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @param replaceChar 替换符 |
| | | * @return |
| | | */ |
| | | public String Replace(final String text, final char replaceChar) { |
| | | final StringBuilder result = new StringBuilder(text); |
| | | |
| | | TrieNode2 ptr = null; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode2 tn = null; |
| | | if (ptr == null) { |
| | | tn = _first[t]; |
| | | } else { |
| | | if (ptr.HasKey(t) == false) { |
| | | tn = _first[t]; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | } |
| | | if (tn != null) { |
| | | if (tn.End) { |
| | | final int maxLength = _keywords[tn.Results.get(0)].length(); |
| | | final int start = i + 1 - maxLength; |
| | | for (int j = start; j <= i; j++) { |
| | | result.setCharAt(j, replaceChar); |
| | | } |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return result.toString(); |
| | | } |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words; |
| | | |
| | | import org.springblade.modules.words.internals.BaseSearchEx; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.List; |
| | | |
| | | public class StringSearchEx extends BaseSearchEx { |
| | | /** |
| | | * 在文本中查找所有的关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public List<String> FindAll(final String text) { |
| | | final List<String> result = new ArrayList<String>(); |
| | | |
| | | int p = 0; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | final Integer t = _dict[t1]; |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _first[t]; |
| | | } else { |
| | | final int index = _nextIndex[p].IndexOf(t); |
| | | if (index == -1) { |
| | | next = _first[t]; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index); |
| | | } |
| | | } |
| | | if (next != 0) { |
| | | for (int j = _end[next]; j < _end[next + 1]; j++) { |
| | | result.add(_keywords[_resultIndex[j]]); |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return result; |
| | | } |
| | | |
| | | /** |
| | | * 在文本中查找第一个关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public String FindFirst(final String text) { |
| | | int p = 0; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _first[t]; |
| | | } else { |
| | | final int index = _nextIndex[p].IndexOf(t); |
| | | if (index == -1) { |
| | | next = _first[t]; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index); |
| | | } |
| | | } |
| | | if (next != 0) { |
| | | final int start = _end[next]; |
| | | if (start < _end[next + 1]) { |
| | | return _keywords[_resultIndex[start]]; |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return null; |
| | | } |
| | | |
| | | /** |
| | | * 判断文本是否包含关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public boolean ContainsAny(final String text) { |
| | | int p = 0; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _first[t]; |
| | | } else { |
| | | final int index = _nextIndex[p].IndexOf(t); |
| | | if (index == -1) { |
| | | next = _first[t]; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index); |
| | | } |
| | | } |
| | | |
| | | if (next != 0) { |
| | | if (_end[next] < _end[next + 1]) { |
| | | return true; |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | /** |
| | | * 在文本中替换所有的关键字, 替换符默认为 * |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public String Replace(final String text) { |
| | | return Replace(text, '*'); |
| | | } |
| | | |
| | | /** |
| | | * 在文本中替换所有的关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @param replaceChar 替换符 |
| | | * @return |
| | | */ |
| | | public String Replace(final String text, final char replaceChar) { |
| | | final StringBuilder result = new StringBuilder(text); |
| | | int p = 0; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _first[t]; |
| | | } else { |
| | | final int index = _nextIndex[p].IndexOf(t); |
| | | if (index == -1) { |
| | | next = _first[t]; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index); |
| | | } |
| | | } |
| | | if (next != 0) { |
| | | final int start = _end[next]; |
| | | if (start < _end[next + 1]) { |
| | | final int maxLength = _keywords[_resultIndex[start]].length(); |
| | | for (int j = i + 1 - maxLength; j <= i; j++) { |
| | | result.setCharAt(j, replaceChar); |
| | | } |
| | | |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return result.toString(); |
| | | } |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words; |
| | | |
| | | import org.springblade.modules.words.internals.BaseSearchEx2; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.List; |
| | | |
| | | |
| | | public class StringSearchEx2 extends BaseSearchEx2 { |
| | | |
| | | /** |
| | | * 在文本中查找所有的关键字 |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public List<String> FindAll(final String text) { |
| | | final List<String> root = new ArrayList<String>(); |
| | | int p = 0; |
| | | |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final int t = _dict[text.charAt(i)]; |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next = _next[p] + t; |
| | | boolean find = _key[next] == t; |
| | | if (find == false && p != 0) { |
| | | p = 0; |
| | | next = _next[0] + t; |
| | | find = _key[next] == t; |
| | | } |
| | | if (find) { |
| | | final int index = _check[next]; |
| | | if (index > 0) { |
| | | for (final int item : _guides[index]) { |
| | | root.add(_keywords[item]); |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | } |
| | | return root; |
| | | } |
| | | |
| | | /** |
| | | * 在文本中查找第一个关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public String FindFirst(final String text) { |
| | | int p = 0; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final int t = _dict[text.charAt(i)]; |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next = _next[p] + t; |
| | | if (_key[next] == t) { |
| | | final int index = _check[next]; |
| | | if (index > 0) { |
| | | return _keywords[_guides[index][0]]; |
| | | } |
| | | p = next; |
| | | } else { |
| | | p = 0; |
| | | next = _next[p] + t; |
| | | if (_key[next] == t) { |
| | | final int index = _check[next]; |
| | | if (index > 0) { |
| | | return _keywords[_guides[index][0]]; |
| | | } |
| | | p = next; |
| | | } |
| | | } |
| | | } |
| | | return null; |
| | | } |
| | | |
| | | /** |
| | | * 判断文本是否包含关键字 |
| | | * |
| | | * @param text 文本 |
| | | */ |
| | | public boolean ContainsAny(final String text) { |
| | | int p = 0; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final int t = _dict[text.charAt(i)]; |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next = _next[p] + t; |
| | | if (_key[next] == t) { |
| | | if (_check[next] > 0) { |
| | | return true; |
| | | } |
| | | p = next; |
| | | } else { |
| | | p = 0; |
| | | next = _next[p] + t; |
| | | if (_key[next] == t) { |
| | | if (_check[next] > 0) { |
| | | return true; |
| | | } |
| | | p = next; |
| | | } |
| | | } |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | /** |
| | | * 在文本中替换所有的关键字, 替换符默认为 * |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public String Replace(final String text) { |
| | | return Replace(text, '*'); |
| | | } |
| | | |
| | | /** |
| | | * 在文本中替换所有的关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @param replaceChar 替换符 |
| | | * @return |
| | | */ |
| | | public String Replace(final String text, final char replaceChar) { |
| | | final StringBuilder result = new StringBuilder(text); |
| | | |
| | | int p = 0; |
| | | |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final int t = _dict[text.charAt(i)]; |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next = _next[p] + t; |
| | | boolean find = _key[next] == t; |
| | | if (find == false && p != 0) { |
| | | p = 0; |
| | | next = _next[p] + t; |
| | | find = _key[next] == t; |
| | | } |
| | | if (find) { |
| | | final int index = _check[next]; |
| | | if (index > 0) { |
| | | final int maxLength = _keywords[_guides[index][0]].length(); |
| | | final int start = i + 1 - maxLength; |
| | | for (int j = start; j <= i; j++) { |
| | | result.setCharAt(j, replaceChar); |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | } |
| | | return result.toString(); |
| | | } |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words; |
| | | |
| | | import org.springblade.modules.words.internals.PinyinDict; |
| | | import org.springblade.modules.words.internals.Translate; |
| | | |
| | | import java.io.IOException; |
| | | import java.util.List; |
| | | import java.util.regex.Pattern; |
| | | |
| | | public class WordsHelper { |
| | | |
| | | /** |
| | | * 获取首字母,中文字符集为[0x3400,0x9FD5],注:偏僻汉字很多未验证 |
| | | * |
| | | * @param text 原文本 |
| | | * @return |
| | | * @throws IOException |
| | | * @throws NumberFormatException |
| | | */ |
| | | public static String GetFirstPinyin(String text) throws NumberFormatException, IOException { |
| | | return PinyinDict.GetFirstPinyin(text, 0); |
| | | } |
| | | |
| | | /** |
| | | * 获取拼音全拼, 不支持多音,中文字符集为[0x3400,0x9FD5],注:偏僻汉字很多未验证 请使用GetPinyin方法,此方法不支持多音 |
| | | * |
| | | * @param text 原文本 |
| | | * @param tone 是否带声调 |
| | | * @return |
| | | * @throws IOException |
| | | * @throws NumberFormatException |
| | | */ |
| | | public static String GetPinyinFast(String text, Boolean tone) throws NumberFormatException, IOException { |
| | | StringBuilder sb = new StringBuilder(); |
| | | for (int i = 0; i < text.length(); i++) { |
| | | Character c = text.charAt(i); |
| | | sb.append(PinyinDict.GetPinyinFast(c, tone ? 1 : 0)); |
| | | } |
| | | return sb.toString(); |
| | | } |
| | | |
| | | /** |
| | | * 获取拼音全拼, 不支持多音,中文字符集为[0x3400,0x9FD5],注:偏僻汉字很多未验证 请使用GetPinyin方法,此方法不支持多音 |
| | | * |
| | | * @param text |
| | | * @return |
| | | * @throws NumberFormatException |
| | | * @throws IOException |
| | | */ |
| | | public static String GetPinyinFast(String text) throws NumberFormatException, IOException { |
| | | StringBuilder sb = new StringBuilder(); |
| | | for (int i = 0; i < text.length(); i++) { |
| | | Character c = text.charAt(i); |
| | | sb.append(PinyinDict.GetPinyinFast(c, 0)); |
| | | } |
| | | return sb.toString(); |
| | | } |
| | | |
| | | /** |
| | | * 获取拼音全拼,支持多音,中文字符集为[0x4E00,0x9FD5] |
| | | * |
| | | * @param text 原文本 |
| | | * @param tone 是否带声调 |
| | | * @return |
| | | * @throws NumberFormatException |
| | | * @throws IOException |
| | | */ |
| | | public static String GetPinyin(String text, Boolean tone) throws NumberFormatException, IOException { |
| | | return PinyinDict.GetPinyin(text, tone ? 1 : 0); |
| | | } |
| | | |
| | | /** |
| | | * 获取拼音全拼,支持多音,中文字符集为[0x4E00,0x9FD5] |
| | | * |
| | | * @param text |
| | | * @return |
| | | * @throws NumberFormatException |
| | | * @throws IOException |
| | | */ |
| | | public static String GetPinyin(String text) throws NumberFormatException, IOException { |
| | | return PinyinDict.GetPinyin(text, 0); |
| | | } |
| | | |
| | | /** |
| | | * 获取所有拼音,中文字符集为[0x3400,0x9FD5],注:偏僻汉字很多未验证 |
| | | * |
| | | * @param c 原文本 |
| | | * @param tone 是否带声调 |
| | | * @return |
| | | * @throws NumberFormatException |
| | | * @throws IOException |
| | | */ |
| | | public static List<String> GetAllPinyin(char c, Boolean tone) throws NumberFormatException, IOException { |
| | | return PinyinDict.GetAllPinyin(c, tone ? 1 : 0); |
| | | } |
| | | |
| | | /** |
| | | * 获取所有拼音,中文字符集为[0x3400,0x9FD5],注:偏僻汉字很多未验证 |
| | | * |
| | | * @param c |
| | | * @return |
| | | * @throws NumberFormatException |
| | | * @throws IOException |
| | | */ |
| | | public static List<String> GetAllPinyin(char c) throws NumberFormatException, IOException { |
| | | return PinyinDict.GetAllPinyin(c, 0); |
| | | } |
| | | |
| | | /** |
| | | * 获取姓名拼音,中文字符集为[0x3400,0x9FD5],注:偏僻汉字很多未验证 |
| | | * |
| | | * @param name 姓名 |
| | | * @param tone 是否带声调 |
| | | * @return |
| | | * @throws NumberFormatException |
| | | * @throws IOException |
| | | */ |
| | | public static String GetPinyinForName(String name, Boolean tone) throws NumberFormatException, IOException { |
| | | return String.join("", PinyinDict.GetPinyinForName(name, tone ? 1 : 0)); |
| | | } |
| | | |
| | | /** |
| | | * 获取姓名拼音,中文字符集为[0x3400,0x9FD5],注:偏僻汉字很多未验证 |
| | | * |
| | | * @param name |
| | | * @return |
| | | * @throws NumberFormatException |
| | | * @throws IOException |
| | | */ |
| | | public static String GetPinyinForName(String name) throws NumberFormatException, IOException { |
| | | return String.join("", PinyinDict.GetPinyinForName(name, 0)); |
| | | } |
| | | |
| | | /** |
| | | * 获取姓名拼音,中文字符集为[0x3400,0x9FD5],注:偏僻汉字很多未验证 |
| | | * |
| | | * @param name 姓名 |
| | | * @param tone 是否带声调 |
| | | * @return |
| | | * @throws NumberFormatException |
| | | * @throws IOException |
| | | */ |
| | | public static List<String> GetPinyinListForName(String name, Boolean tone) |
| | | throws NumberFormatException, IOException { |
| | | return PinyinDict.GetPinyinForName(name, tone ? 1 : 0); |
| | | } |
| | | |
| | | /** |
| | | * 获取姓名拼音,中文字符集为[0x3400,0x9FD5],注:偏僻汉字很多未验证 |
| | | * @param name |
| | | * @return |
| | | * @throws NumberFormatException |
| | | * @throws IOException |
| | | */ |
| | | public static List<String> GetPinyinListForName(String name) throws NumberFormatException, IOException { |
| | | return PinyinDict.GetPinyinForName(name, 0); |
| | | } |
| | | |
| | | |
| | | /** |
| | | * 判断输入是否为中文 ,中文字符集为[0x4E00,0x9FA5] |
| | | * |
| | | * @param content |
| | | * @return |
| | | */ |
| | | public static boolean HasChinese(String content) { |
| | | return Pattern.matches("[\\u3400-\\u4db5\\u4e00-\\u9fd5]", content); |
| | | } |
| | | |
| | | /** |
| | | * 判断输入是否全为中文,中文字符集为[0x4E00,0x9FA5] |
| | | * |
| | | * @param content |
| | | * @return |
| | | */ |
| | | public static boolean IsAllChinese(String content) { |
| | | return Pattern.matches("^[\\u3400-\\u4db5\\u4e00-\\u9fd5]*$", content); |
| | | } |
| | | |
| | | /** |
| | | * 判断含有英语 |
| | | * |
| | | * @param content |
| | | * @return |
| | | */ |
| | | public static boolean HasEnglish(String content) { |
| | | return Pattern.matches("[A-Za-z]", content); |
| | | } |
| | | |
| | | /** |
| | | * 判断是否全部英语 |
| | | * |
| | | * @param content |
| | | * @return |
| | | */ |
| | | public static boolean IsAllEnglish(String content) { |
| | | return Pattern.matches("^[A-Za-z]*$", content); |
| | | } |
| | | |
| | | /** |
| | | * 半角转全角 |
| | | * |
| | | * @param input |
| | | * @return |
| | | */ |
| | | public static String ToSBC(String input) { |
| | | StringBuilder sb = new StringBuilder(input); |
| | | for (int i = 0; i < input.length(); i++) { |
| | | char c = input.charAt(i); |
| | | if (c == 32) { |
| | | sb.setCharAt(i, (char) 12288); |
| | | } else if (c < 127) { |
| | | sb.setCharAt(i, (char) (c + 65248)); |
| | | } |
| | | } |
| | | return sb.toString(); |
| | | } |
| | | |
| | | /** |
| | | * 转半角的函数 |
| | | * |
| | | * @param input |
| | | * @return |
| | | */ |
| | | public static String ToDBC(String input) { |
| | | StringBuilder sb = new StringBuilder(input); |
| | | for (int i = 0; i < input.length(); i++) { |
| | | char c = input.charAt(i); |
| | | if (c == 12288) { |
| | | sb.setCharAt(i, (char) 32); |
| | | } else if (c > 65280 && c < 65375) { |
| | | sb.setCharAt(i, (char) (c - 65248)); |
| | | } |
| | | } |
| | | return sb.toString(); |
| | | } |
| | | |
| | | /** |
| | | * 转繁体中文 |
| | | * |
| | | * @param text |
| | | * @return |
| | | * @throws Exception |
| | | */ |
| | | public static String ToTraditionalChinese(String text) throws Exception { |
| | | return Translate.ToTraditionalChinese(text, 0); |
| | | } |
| | | |
| | | /** |
| | | * 转繁体中文 |
| | | * |
| | | * @param text |
| | | * @param type 0、繁体中文,1、港澳繁体,2、台湾正体 |
| | | * @return |
| | | * @throws Exception |
| | | */ |
| | | public static String ToTraditionalChinese(String text, int type) throws Exception { |
| | | return Translate.ToTraditionalChinese(text, type); |
| | | } |
| | | |
| | | /*** |
| | | * 转简体中文 |
| | | * |
| | | * @param text |
| | | * @return |
| | | * @throws Exception |
| | | */ |
| | | public static String ToSimplifiedChinese(String text) throws Exception { |
| | | return Translate.ToSimplifiedChinese(text, 0); |
| | | } |
| | | |
| | | /** |
| | | * 转简体中文 |
| | | * |
| | | * @param text |
| | | * @param srcType 0、繁体中文,1、港澳繁体,2、台湾正体 |
| | | * @return |
| | | * @throws Exception |
| | | */ |
| | | public static String ToSimplifiedChinese(String text, int srcType) throws Exception { |
| | | return Translate.ToSimplifiedChinese(text, srcType); |
| | | } |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words; |
| | | |
| | | import org.springblade.modules.words.internals.BaseMatch; |
| | | import org.springblade.modules.words.internals.TrieNode3; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.List; |
| | | |
| | | /** |
| | | * 文本搜索匹配, ,支持 部分 正则 如 . ? [ ] \ ( | ) ,不支持( )内再嵌套( ) |
| | | */ |
| | | public class WordsMatch extends BaseMatch { |
| | | |
| | | /** |
| | | * 在文本中查找第一个关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public WordsSearchResult FindFirst(final String text) { |
| | | TrieNode3 ptr = null; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | |
| | | TrieNode3 tn; |
| | | if (ptr == null) { |
| | | tn = _first[t]; |
| | | } else { |
| | | if (ptr.HasKey(t) == false) { |
| | | if (ptr.HasWildcard) { |
| | | final WordsSearchResult result = FindFirst(text, i + 1, ptr.WildcardNode); |
| | | if (result != null) { |
| | | return result; |
| | | } |
| | | } |
| | | tn = _first[t]; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | } |
| | | if (tn != null) { |
| | | if (tn.End) { |
| | | final Integer r = tn.Results.get(0); |
| | | final int length = _keywordLength[r]; |
| | | final int start = i - length + 1; |
| | | if (start >= 0) { |
| | | final int kIndex = _keywordIndex[r]; |
| | | final String matchKeyword = _matchKeywords[kIndex]; |
| | | final String keyword = text.substring(start, i + 1); |
| | | return new WordsSearchResult(keyword, start, i, kIndex, matchKeyword); |
| | | } |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return null; |
| | | } |
| | | |
| | | private WordsSearchResult FindFirst(final String text, final int index, TrieNode3 ptr) { |
| | | for (int i = index; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode3 tn; |
| | | if (ptr.HasKey(t) == false) { |
| | | if (ptr.HasWildcard) { |
| | | final WordsSearchResult result = FindFirst(text, i + 1, ptr.WildcardNode); |
| | | if (result != null) { |
| | | return result; |
| | | } |
| | | } |
| | | return null; |
| | | } |
| | | tn = ptr.GetValue(t); |
| | | |
| | | if (tn.End) { |
| | | final Integer r = tn.Results.get(0); |
| | | final int length = _keywordLength[r]; |
| | | final int start = i - length + 1; |
| | | if (start >= 0) { |
| | | final int kIndex = _keywordIndex[r]; |
| | | final String matchKeyword = _matchKeywords[kIndex]; |
| | | final String keyword = text.substring(start, i + 1); |
| | | return new WordsSearchResult(keyword, start, i, kIndex, matchKeyword); |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return null; |
| | | } |
| | | |
| | | /** |
| | | * 在文本中查找所有的关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public List<WordsSearchResult> FindAll(final String text) { |
| | | TrieNode3 ptr = null; |
| | | final List<WordsSearchResult> result = new ArrayList<WordsSearchResult>(); |
| | | |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode3 tn; |
| | | if (ptr == null) { |
| | | tn = _first[t]; |
| | | } else { |
| | | if (ptr.HasKey(t) == false) { |
| | | if (ptr.HasWildcard) { |
| | | FindAll(text, i + 1, ptr.WildcardNode, result); |
| | | } |
| | | tn = _first[t]; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | } |
| | | if (tn != null) { |
| | | if (tn.End) { |
| | | for (final Integer r : tn.Results) { |
| | | final int length = _keywordLength[r]; |
| | | final int start = i - length + 1; |
| | | if (start >= 0) { |
| | | final int kIndex = _keywordIndex[r]; |
| | | final String matchKeyword = _matchKeywords[kIndex]; |
| | | final String keyword = text.substring(start, i + 1); |
| | | final WordsSearchResult wr = new WordsSearchResult(keyword, start, i, kIndex, matchKeyword); |
| | | result.add(wr); |
| | | } |
| | | } |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return result; |
| | | } |
| | | |
| | | private void FindAll(final String text, final int index, TrieNode3 ptr, final List<WordsSearchResult> result) { |
| | | for (int i = index; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode3 tn; |
| | | if (ptr.HasKey(t) == false) { |
| | | if (ptr.HasWildcard) { |
| | | FindAll(text, i + 1, ptr.WildcardNode, result); |
| | | } |
| | | return; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | if (tn.End) { |
| | | for (final Integer r : tn.Results) { |
| | | final int length = _keywordLength[r]; |
| | | final int start = i - length + 1; |
| | | if (start >= 0) { |
| | | final int kIndex = _keywordIndex[r]; |
| | | final String matchKeyword = _matchKeywords[kIndex]; |
| | | final String keyword = text.substring(start, i + 1); |
| | | final WordsSearchResult wr = new WordsSearchResult(keyword, start, i, kIndex, matchKeyword); |
| | | result.add(wr); |
| | | } |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | } |
| | | |
| | | /** |
| | | * 判断文本是否包含关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public boolean ContainsAny(final String text) { |
| | | TrieNode3 ptr = null; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode3 tn; |
| | | if (ptr == null) { |
| | | tn = _first[t]; |
| | | } else { |
| | | if (ptr.HasKey(t) == false) { |
| | | if (ptr.HasWildcard) { |
| | | final boolean result = ContainsAny(text, i + 1, ptr.WildcardNode); |
| | | if (result) { |
| | | return true; |
| | | } |
| | | } |
| | | tn = _first[t]; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | } |
| | | if (tn != null) { |
| | | if (tn.End) { |
| | | final int length = _keywordLength[tn.Results.get(0)]; |
| | | final int s = i - length + 1; |
| | | if (s >= 0) { |
| | | return true; |
| | | } |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | private boolean ContainsAny(final String text, final int index, TrieNode3 ptr) { |
| | | for (int i = index; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode3 tn; |
| | | if (ptr.HasKey(t) == false) { |
| | | if (ptr.HasWildcard) { |
| | | return ContainsAny(text, i + 1, ptr.WildcardNode); |
| | | } |
| | | return false; |
| | | } |
| | | tn = ptr.GetValue(t); |
| | | |
| | | if (tn.End) { |
| | | final int length = _keywordLength[tn.Results.get(0)]; |
| | | final int s = i - length + 1; |
| | | if (s >= 0) { |
| | | return true; |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | /** |
| | | * 在文本中替换所有的关键字, 替换符默认为 * |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public String Replace(final String text) { |
| | | return Replace(text, '*'); |
| | | } |
| | | |
| | | /** |
| | | * 在文本中替换所有的关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @param replaceChar 替换符 |
| | | * @return |
| | | */ |
| | | public String Replace(final String text, final char replaceChar) { |
| | | final StringBuilder result = new StringBuilder(text); |
| | | |
| | | TrieNode3 ptr = null; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode3 tn; |
| | | if (ptr == null) { |
| | | tn = _first[t]; |
| | | } else { |
| | | if (ptr.HasKey(t) == false) { |
| | | if (ptr.HasWildcard) { |
| | | Replace(text, i + 1, ptr.WildcardNode, replaceChar, result); |
| | | } |
| | | tn = _first[t]; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | } |
| | | if (tn != null) { |
| | | if (tn.End) { |
| | | final int maxLength = _keywordLength[tn.Results.get(0)]; |
| | | final int start = i + 1 - maxLength; |
| | | if (start >= 0) { |
| | | for (int j = start; j <= i; j++) { |
| | | result.setCharAt(j, replaceChar); |
| | | } |
| | | } |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return result.toString(); |
| | | } |
| | | |
| | | private void Replace(final String text, final int index, TrieNode3 ptr, final char replaceChar, |
| | | final StringBuilder result) { |
| | | for (int i = index; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode3 tn; |
| | | if (ptr.HasKey(t) == false) { |
| | | if (ptr.HasWildcard) { |
| | | Replace(text, i + 1, ptr.WildcardNode, replaceChar, result); |
| | | } |
| | | return; |
| | | } |
| | | tn = ptr.GetValue(t); |
| | | if (tn.End) { |
| | | final int maxLength = _keywordLength[tn.Results.get(0)]; |
| | | final int start = i + 1 - maxLength; |
| | | if (start >= 0) { |
| | | for (int j = start; j <= i; j++) { |
| | | result.setCharAt(j, replaceChar); |
| | | } |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | } |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words; |
| | | |
| | | import org.springblade.modules.words.internals.BaseMatchEx; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.List; |
| | | |
| | | public class WordsMatchEx extends BaseMatchEx { |
| | | |
| | | /// <summary> |
| | | /// 在文本中查找第一个关键字 |
| | | /// </summary> |
| | | /// <param name="text">文本</param> |
| | | /// <returns></returns> |
| | | public WordsSearchResult FindFirst(final String text) { |
| | | int p = 0; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | final int t = _dict[t1]; |
| | | |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | final int index = _nextIndex[p].IndexOf(t); |
| | | if (index == -1) { |
| | | if (_wildcard[p] > 0) { |
| | | final WordsSearchResult r = FindFirst(text, i + 1, _wildcard[p]); |
| | | if (r != null) { |
| | | return r; |
| | | } |
| | | } |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index); |
| | | } |
| | | } |
| | | if (next != 0) { |
| | | final int start = _end[next]; |
| | | if (start < _end[next + 1]) { |
| | | final int length = _keywordLength[_resultIndex[start]]; |
| | | final int s = i - length + 1; |
| | | if (s >= 0) { |
| | | final String key = text.substring(s, i + 1); |
| | | final int index = _resultIndex[start]; |
| | | final String matchKeyword = _matchKeywords[index]; |
| | | return new WordsSearchResult(key, i + 1 - key.length(), i, index, matchKeyword); |
| | | } |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return null; |
| | | } |
| | | |
| | | private WordsSearchResult FindFirst(final String text, final int index, int p) { |
| | | for (int i = index; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | return null; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | final int index2 = _nextIndex[p].IndexOf(t); |
| | | if (index2 == -1) { |
| | | if (_wildcard[p] > 0) { |
| | | final WordsSearchResult r = FindFirst(text, i + 1, _wildcard[p]); |
| | | if (r != null) { |
| | | return r; |
| | | } |
| | | } |
| | | return null; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index2); |
| | | } |
| | | } |
| | | final int start = _end[next]; |
| | | if (start < _end[next + 1]) { |
| | | final int length = _keywordLength[_resultIndex[start]]; |
| | | final int s = i - length + 1; |
| | | if (s >= 0) { |
| | | final String key = text.substring(s, i + 1); |
| | | final int index2 = _resultIndex[start]; |
| | | final String matchKeyword = _matchKeywords[index2]; |
| | | return new WordsSearchResult(key, i + 1 - key.length(), i, index2, matchKeyword); |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return null; |
| | | } |
| | | |
| | | /// <summary> |
| | | /// 在文本中查找所有的关键字 |
| | | /// </summary> |
| | | /// <param name="text">文本</param> |
| | | /// <returns></returns> |
| | | public List<WordsSearchResult> FindAll(final String text) { |
| | | final List<WordsSearchResult> result = new ArrayList<WordsSearchResult>(); |
| | | int p = 0; |
| | | |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | final int index2 = _nextIndex[p].IndexOf(t); |
| | | if (index2 == -1) { |
| | | if (_wildcard[p] > 0) { |
| | | FindAll(text, i + 1, _wildcard[p], result); |
| | | } |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index2); |
| | | } |
| | | } |
| | | |
| | | if (next != 0) { |
| | | for (int j = _end[next]; j < _end[next + 1]; j++) { |
| | | final int length = _keywordLength[_resultIndex[j]]; |
| | | final int s = i - length + 1; |
| | | if (s >= 0) { |
| | | final int kIndex = _keywordIndex[j]; |
| | | final String matchKeyword = _matchKeywords[kIndex]; |
| | | final String key = text.substring(s, i + 1); |
| | | final WordsSearchResult r = new WordsSearchResult(key, s, i, kIndex, matchKeyword); |
| | | result.add(r); |
| | | } |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return result; |
| | | } |
| | | |
| | | private void FindAll(final String text, final int index, int p, final List<WordsSearchResult> result) { |
| | | for (int i = index; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | return; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | final int index2 = _nextIndex[p].IndexOf(t); |
| | | if (index2 == -1) { |
| | | if (_wildcard[p] > 0) { |
| | | FindAll(text, i + 1, _wildcard[p], result); |
| | | } |
| | | return; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index2); |
| | | } |
| | | } |
| | | |
| | | for (int j = _end[next]; j < _end[next + 1]; j++) { |
| | | final int length = _keywordLength[_resultIndex[j]]; |
| | | final int s = i - length + 1; |
| | | if (s >= 0) { |
| | | final int kIndex = _keywordIndex[j]; |
| | | final String matchKeyword = _matchKeywords[kIndex]; |
| | | final String key = text.substring(s, i + 1); |
| | | final WordsSearchResult r = new WordsSearchResult(key, s, i, kIndex, matchKeyword); |
| | | result.add(r); |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | } |
| | | |
| | | /// <summary> |
| | | /// 判断文本是否包含关键字 |
| | | /// </summary> |
| | | /// <param name="text">文本</param> |
| | | /// <returns></returns> |
| | | public boolean ContainsAny(final String text) { |
| | | int p = 0; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | final int t = _dict[t1]; |
| | | |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | final int index = _nextIndex[p].IndexOf(t); |
| | | if (index == -1) { |
| | | if (_wildcard[p] > 0) { |
| | | final boolean r = ContainsAny(text, i + 1, _wildcard[p]); |
| | | if (r) { |
| | | return true; |
| | | } |
| | | } |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index); |
| | | } |
| | | } |
| | | |
| | | if (next != 0) { |
| | | if (_end[next] < _end[next + 1]) { |
| | | return true; |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | private boolean ContainsAny(final String text, final int index, int p) { |
| | | for (int i = index; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | return false; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | final int index2 = _nextIndex[p].IndexOf(t); |
| | | if (index2 == -1) { |
| | | if (_wildcard[p] > 0) { |
| | | final boolean r = ContainsAny(text, i + 1, _wildcard[p]); |
| | | if (r) { |
| | | return true; |
| | | } |
| | | } |
| | | return false; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index2); |
| | | } |
| | | } |
| | | |
| | | final int start = _end[next]; |
| | | if (start < _end[next + 1]) { |
| | | final int length = _keywordLength[_resultIndex[start]]; |
| | | final int s = i - length + 1; |
| | | if (s >= 0) { |
| | | return true; |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | /// <summary> |
| | | /// 在文本中替换所有的关键字 |
| | | /// </summary> |
| | | /// <param name="text">文本</param> |
| | | /// <param name="replaceChar">替换符</param> |
| | | /// <returns></returns> |
| | | public String Replace(final String text, final char replaceChar) { |
| | | final StringBuilder result = new StringBuilder(text); |
| | | |
| | | int p = 0; |
| | | |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | final int t = _dict[t1]; |
| | | |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | final int index2 = _nextIndex[p].IndexOf(t); |
| | | if (index2 == -1) { |
| | | if (_wildcard[p] > 0) { |
| | | Replace(text, i + 1, _wildcard[p], replaceChar, result); |
| | | } |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index2); |
| | | } |
| | | } |
| | | |
| | | if (next != 0) { |
| | | final int start = _end[next]; |
| | | if (start < _end[next + 1]) { |
| | | final int maxLength = _keywordLength[_resultIndex[start]]; |
| | | final int start2 = i + 1 - maxLength; |
| | | if (start2 >= 0) { |
| | | for (int j = start2; j <= i; j++) { |
| | | result.setCharAt(j, replaceChar); |
| | | } |
| | | } |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return result.toString(); |
| | | } |
| | | |
| | | private void Replace(final String text, final int index, int p, final char replaceChar, |
| | | final StringBuilder result) { |
| | | for (int i = index; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | return; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _firstIndex[t]; |
| | | } else { |
| | | final int index2 = _nextIndex[p].IndexOf(t); |
| | | if (index2 == -1) { |
| | | if (_wildcard[p] > 0) { |
| | | Replace(text, i + 1, _wildcard[p], replaceChar, result); |
| | | } |
| | | return; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index2); |
| | | } |
| | | } |
| | | |
| | | final int start = _end[next]; |
| | | if (start < _end[next + 1]) { |
| | | final int maxLength = _keywordLength[_resultIndex[start]]; |
| | | final int start2 = i + 1 - maxLength; |
| | | if (start2 >= 0) { |
| | | for (int j = start2; j <= i; j++) { |
| | | result.setCharAt(j, replaceChar); |
| | | } |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | } |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words; |
| | | |
| | | import org.springblade.modules.words.internals.BaseSearch; |
| | | import org.springblade.modules.words.internals.TrieNode2; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.List; |
| | | |
| | | public class WordsSearch extends BaseSearch { |
| | | public String[] _others; |
| | | |
| | | /** |
| | | * 在文本中查找第一个关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public WordsSearchResult FindFirst(final String text) { |
| | | TrieNode2 ptr = null; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode2 tn = null; |
| | | if (ptr == null) { |
| | | tn = _first[t]; |
| | | } else { |
| | | if (ptr.HasKey(t) == false) { |
| | | tn = _first[t]; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | } |
| | | if (tn != null) { |
| | | if (tn.End) { |
| | | for (final Integer index : tn.Results) { |
| | | final String key = _keywords[index]; |
| | | return new WordsSearchResult(key, i + 1 - key.length(), i, index); |
| | | } |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return null; |
| | | } |
| | | |
| | | /** |
| | | * 在文本中查找所有的关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public List<WordsSearchResult> FindAll(final String text) { |
| | | TrieNode2 ptr = null; |
| | | final List<WordsSearchResult> list = new ArrayList<WordsSearchResult>(); |
| | | |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode2 tn = null; |
| | | if (ptr == null) { |
| | | tn = _first[t]; |
| | | } else { |
| | | if (ptr.HasKey(t) == false) { |
| | | tn = _first[t]; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | } |
| | | if (tn != null) { |
| | | if (tn.End) { |
| | | for (final Integer index : tn.Results) { |
| | | final String key = _keywords[index]; |
| | | final WordsSearchResult item = new WordsSearchResult(key, i + 1 - key.length(), i, index); |
| | | list.add(item); |
| | | } |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return list; |
| | | } |
| | | |
| | | /** |
| | | * 判断文本是否包含关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public boolean ContainsAny(final String text) { |
| | | TrieNode2 ptr = null; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode2 tn = null; |
| | | if (ptr == null) { |
| | | tn = _first[t]; |
| | | } else { |
| | | if (ptr.HasKey(t) == false) { |
| | | tn = _first[t]; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | } |
| | | if (tn != null) { |
| | | if (tn.End) { |
| | | return true; |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | /** |
| | | * 在文本中替换所有的关键字, 替换符默认为 * |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public String Replace(final String text) { |
| | | return Replace(text, '*'); |
| | | } |
| | | |
| | | /** |
| | | * 在文本中替换所有的关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @param replaceChar 替换符 |
| | | * @return |
| | | */ |
| | | public String Replace(final String text, final char replaceChar) { |
| | | final StringBuilder result = new StringBuilder(text); |
| | | |
| | | TrieNode2 ptr = null; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t = text.charAt(i); |
| | | TrieNode2 tn = null; |
| | | if (ptr == null) { |
| | | tn = _first[t]; |
| | | } else { |
| | | if (ptr.HasKey(t) == false) { |
| | | tn = _first[t]; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | } |
| | | if (tn != null) { |
| | | if (tn.End) { |
| | | final int maxLength = _keywords[tn.Results.get(0)].length(); |
| | | final int start = i + 1 - maxLength; |
| | | for (int j = start; j <= i; j++) { |
| | | result.setCharAt(j, replaceChar); |
| | | } |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return result.toString(); |
| | | } |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words; |
| | | |
| | | import org.springblade.modules.words.internals.BaseSearchEx; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.List; |
| | | |
| | | public class WordsSearchEx extends BaseSearchEx { |
| | | /** |
| | | * 在文本中查找所有的关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public List<WordsSearchResult> FindAll(final String text) { |
| | | final List<WordsSearchResult> result = new ArrayList<WordsSearchResult>(); |
| | | |
| | | int p = 0; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _first[t]; |
| | | } else { |
| | | final int index = _nextIndex[p].IndexOf(t); |
| | | if (index == -1) { |
| | | next = _first[t]; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index); |
| | | } |
| | | } |
| | | if (next != 0) { |
| | | for (int j = _end[next]; j < _end[next + 1]; j++) { |
| | | final int index = _resultIndex[j]; |
| | | final String key = _keywords[index]; |
| | | final WordsSearchResult r = new WordsSearchResult(key, i + 1 - key.length(), i, index); |
| | | result.add(r); |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return result; |
| | | } |
| | | |
| | | /** |
| | | * 在文本中查找第一个关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public WordsSearchResult FindFirst(final String text) { |
| | | int p = 0; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _first[t]; |
| | | } else { |
| | | final int index = _nextIndex[p].IndexOf(t); |
| | | if (index == -1) { |
| | | next = _first[t]; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index); |
| | | } |
| | | } |
| | | if (next != 0) { |
| | | final int start = _end[next]; |
| | | if (start < _end[next + 1]) { |
| | | final int index = _resultIndex[start]; |
| | | final String key = _keywords[index]; |
| | | return new WordsSearchResult(key, i + 1 - key.length(), i, index); |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return null; |
| | | } |
| | | |
| | | /** |
| | | * 判断文本是否包含关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public boolean ContainsAny(final String text) { |
| | | int p = 0; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _first[t]; |
| | | } else { |
| | | final int index = _nextIndex[p].IndexOf(t); |
| | | if (index == -1) { |
| | | next = _first[t]; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index); |
| | | } |
| | | } |
| | | if (next != 0) { |
| | | if (_end[next] < _end[next + 1]) { |
| | | return true; |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | /** |
| | | * 在文本中替换所有的关键字, 替换符默认为 * |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public String Replace(final String text) { |
| | | return Replace(text, '*'); |
| | | } |
| | | |
| | | /** |
| | | * 在文本中替换所有的关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @param replaceChar 替换符 |
| | | * @return |
| | | */ |
| | | public String Replace(final String text, final char replaceChar) { |
| | | final StringBuilder result = new StringBuilder(text); |
| | | int p = 0; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final char t1 = text.charAt(i); |
| | | final int t = _dict[t1]; |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next; |
| | | if (p == 0 || t < _min[p] || t > _max[p]) { |
| | | next = _first[t]; |
| | | } else { |
| | | final int index = _nextIndex[p].IndexOf(t); |
| | | if (index == -1) { |
| | | next = _first[t]; |
| | | } else { |
| | | next = _nextIndex[p].GetValue(index); |
| | | } |
| | | } |
| | | if (next != 0) { |
| | | final int start = _end[next]; |
| | | if (start < _end[next + 1]) { |
| | | final int maxLength = _keywords[_resultIndex[start]].length(); |
| | | for (int j = i + 1 - maxLength; j <= i; j++) { |
| | | result.setCharAt(j, replaceChar); |
| | | } |
| | | |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | return result.toString(); |
| | | } |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words; |
| | | |
| | | import org.springblade.modules.words.internals.BaseSearchEx2; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.List; |
| | | |
| | | |
| | | public class WordsSearchEx2 extends BaseSearchEx2 { |
| | | /** |
| | | * 在文本中查找所有的关键字 |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public List<WordsSearchResult> FindAll(final String text) { |
| | | final List<WordsSearchResult> root = new ArrayList<WordsSearchResult>(); |
| | | int p = 0; |
| | | final int length = text.length(); |
| | | for (int i = 0; i < length; i++) { |
| | | final int t = _dict[text.charAt(i)]; |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next = _next[p] + t; |
| | | boolean find = _key[next] == t; |
| | | if (find == false && p != 0) { |
| | | p = 0; |
| | | next = _next[0] + t; |
| | | find = _key[next] == t; |
| | | } |
| | | if (find) { |
| | | final int index = _check[next]; |
| | | if (index > 0) { |
| | | for (final int item : _guides[index]) { |
| | | final String key = _keywords[item]; |
| | | final WordsSearchResult r = new WordsSearchResult(key, i + 1 - key.length(), i, item); |
| | | root.add(r); |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | } |
| | | return root; |
| | | } |
| | | |
| | | /** |
| | | * 在文本中查找第一个关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public WordsSearchResult FindFirst(final String text) { |
| | | int p = 0; |
| | | final int length = text.length(); |
| | | for (int i = 0; i < length; i++) { |
| | | final int t = _dict[text.charAt(i)]; |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next = _next[p] + t; |
| | | if (_key[next] == t) { |
| | | final int index = _check[next]; |
| | | if (index > 0) { |
| | | final String item = _keywords[_guides[index][0]]; |
| | | return new WordsSearchResult(item, i + 1 - item.length(), i, _guides[index][0]); |
| | | } |
| | | p = next; |
| | | } else { |
| | | p = 0; |
| | | next = _next[p] + t; |
| | | if (_key[next] == t) { |
| | | final int index = _check[next]; |
| | | if (index > 0) { |
| | | final String item = _keywords[_guides[index][0]]; |
| | | return new WordsSearchResult(item, i + 1 - item.length(), i, _guides[index][0]); |
| | | } |
| | | p = next; |
| | | } |
| | | } |
| | | } |
| | | return null; |
| | | } |
| | | |
| | | /** |
| | | * 判断文本是否包含关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public boolean ContainsAny(final String text) { |
| | | int p = 0; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final int t = _dict[text.charAt(i)]; |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next = _next[p] + t; |
| | | if (_key[next] == t) { |
| | | if (_check[next] > 0) { |
| | | return true; |
| | | } |
| | | p = next; |
| | | } else { |
| | | p = 0; |
| | | next = _next[p] + t; |
| | | if (_key[next] == t) { |
| | | if (_check[next] > 0) { |
| | | return true; |
| | | } |
| | | p = next; |
| | | } |
| | | } |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | /** |
| | | * 在文本中替换所有的关键字, 替换符默认为 * |
| | | * |
| | | * @param text 文本 |
| | | * @return |
| | | */ |
| | | public String Replace(final String text) { |
| | | return Replace(text, '*'); |
| | | } |
| | | |
| | | /** |
| | | * 在文本中替换所有的关键字 |
| | | * |
| | | * @param text 文本 |
| | | * @param replaceChar 替换符 |
| | | * @return |
| | | */ |
| | | public String Replace(final String text, final char replaceChar) { |
| | | final StringBuilder result = new StringBuilder(text); |
| | | |
| | | int p = 0; |
| | | |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final int t = _dict[text.charAt(i)]; |
| | | if (t == 0) { |
| | | p = 0; |
| | | continue; |
| | | } |
| | | int next = _next[p] + t; |
| | | boolean find = _key[next] == t; |
| | | if (find == false && p != 0) { |
| | | p = 0; |
| | | next = _next[p] + t; |
| | | find = _key[next] == t; |
| | | } |
| | | if (find) { |
| | | final int index = _check[next]; |
| | | if (index > 0) { |
| | | final int maxLength = _keywords[_guides[index][0]].length(); |
| | | final int start = i + 1 - maxLength; |
| | | for (int j = start; j <= i; j++) { |
| | | result.setCharAt(j, replaceChar); |
| | | } |
| | | } |
| | | p = next; |
| | | } |
| | | } |
| | | return result.toString(); |
| | | } |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words; |
| | | |
| | | public class WordsSearchResult { |
| | | |
| | | public WordsSearchResult(final String keyword, final int start, final int end, final int index) { |
| | | Keyword = keyword; |
| | | End = end; |
| | | Start = start; |
| | | Index = index; |
| | | MatchKeyword = keyword; |
| | | } |
| | | |
| | | public WordsSearchResult(final String keyword, final int start, final int end, final int index, |
| | | final String matchKeyword) { |
| | | Keyword = keyword; |
| | | End = end; |
| | | Start = start; |
| | | Index = index; |
| | | MatchKeyword = matchKeyword; |
| | | } |
| | | |
| | | /** 开始位置 */ |
| | | public int Start; |
| | | /** 结束位置 */ |
| | | public int End; |
| | | /** 关键字 */ |
| | | public String Keyword; |
| | | /** 索引 */ |
| | | public int Index; |
| | | /** 匹配关键字 */ |
| | | public String MatchKeyword; |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words.internals; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.Hashtable; |
| | | import java.util.List; |
| | | import java.util.Map; |
| | | |
| | | public class BaseMatch { |
| | | protected TrieNode3[] _first; |
| | | protected int[] _keywordLength; |
| | | protected int[] _keywordIndex; |
| | | protected String[] _matchKeywords; |
| | | |
| | | protected List<TrieNode> BuildFirstLayerTrieNode(List<String> keywords) { |
| | | TrieNode root = new TrieNode(); |
| | | |
| | | Map<Integer, List<TrieNode>> allNodeLayers = new Hashtable<Integer, List<TrieNode>>(); |
| | | // 第一次关键字 |
| | | for (int i = 0; i < keywords.size(); i++) { |
| | | String p = keywords.get(i); |
| | | TrieNode nd = root; |
| | | int start = 0; |
| | | while (p.charAt(start) == 0) { // 0 为 通配符 |
| | | start++; |
| | | } |
| | | for (int j = start; j < p.length(); j++) { |
| | | nd = nd.Add(p.charAt(j)); |
| | | if (nd.Layer == 0) { |
| | | nd.Layer = j + 1 - start; |
| | | if (allNodeLayers.containsKey(nd.Layer) == false) { |
| | | List<TrieNode> nodes = new ArrayList<TrieNode>(); |
| | | nodes.add(nd); |
| | | allNodeLayers.put(nd.Layer, nodes); |
| | | } else { |
| | | allNodeLayers.get(nd.Layer).add(nd); |
| | | } |
| | | } |
| | | } |
| | | nd.SetResults(i); |
| | | } |
| | | Character z = 0; |
| | | // 第二次关键字 通配符 |
| | | for (int i = 0; i < keywords.size(); i++) { |
| | | String p = keywords.get(i); |
| | | if (p.contains(z.toString()) == false) { |
| | | continue; |
| | | } |
| | | int start = 0; |
| | | while (p.charAt(start) == 0) { // 0 为 通配符 |
| | | start++; |
| | | } |
| | | List<TrieNode> trieNodes = new ArrayList<TrieNode>(); |
| | | trieNodes.add(root); |
| | | |
| | | for (int j = start; j < p.length(); j++) { |
| | | List<TrieNode> newTrieNodes = new ArrayList<TrieNode>(); |
| | | Character c = p.charAt(j); |
| | | if (c == 0) { |
| | | for (TrieNode nd : trieNodes) { |
| | | for (Character key : nd.m_values.keySet()) { |
| | | newTrieNodes.add(nd.m_values.get(key)); |
| | | } |
| | | } |
| | | } else { |
| | | for (TrieNode nd : trieNodes) { |
| | | TrieNode nd2 = nd.Add(c); |
| | | if (nd2.Layer == 0) { |
| | | nd2.Layer = j + 1 - start; |
| | | if (allNodeLayers.containsKey(nd2.Layer) == false) { |
| | | List<TrieNode> nodes = new ArrayList<TrieNode>(); |
| | | nodes.add(nd2); |
| | | allNodeLayers.put(nd2.Layer, nodes); |
| | | } else { |
| | | allNodeLayers.get(nd2.Layer).add(nd2); |
| | | } |
| | | // List<TrieNode> tnodes; |
| | | // if (allNodeLayers.TryGetValue(nd2.Layer, tnodes) == false) { |
| | | // tnodes = new ArrayList<TrieNode>(); |
| | | // allNodeLayers[nd.Layer] = tnodes; |
| | | // } |
| | | // tnodes.add(nd2); |
| | | } |
| | | newTrieNodes.add(nd2); |
| | | } |
| | | } |
| | | trieNodes = newTrieNodes; |
| | | } |
| | | for (TrieNode nd : trieNodes) { |
| | | nd.SetResults(i); |
| | | } |
| | | } |
| | | |
| | | // 添加到 allNode |
| | | List<TrieNode> allNode = new ArrayList<TrieNode>(); |
| | | allNode.add(root); |
| | | for (int i = 0; i < allNodeLayers.size(); i++) { // 注意 这里不能用 keySet() |
| | | List<TrieNode> nodes = allNodeLayers.get(i + 1); |
| | | for (int j = 0; j < nodes.size(); j++) { |
| | | allNode.add(nodes.get(j)); |
| | | } |
| | | } |
| | | allNodeLayers.clear(); |
| | | allNodeLayers = null; |
| | | |
| | | // 第一次 Set Failure |
| | | for (int i = 1; i < allNode.size(); i++) { |
| | | TrieNode nd = allNode.get(i); |
| | | nd.Index = i; |
| | | TrieNode r = nd.Parent.Failure; |
| | | char c = nd.Char; |
| | | while (r != null && !r.m_values.containsKey(c)) |
| | | r = r.Failure; |
| | | if (r == null) |
| | | nd.Failure = root; |
| | | else { |
| | | nd.Failure = r.m_values.get(c); |
| | | for (Integer result : nd.Failure.Results) |
| | | nd.SetResults(result); |
| | | } |
| | | } |
| | | |
| | | // 第二次 Set Failure |
| | | Character zore = 0; |
| | | for (int i = 1; i < allNode.size(); i++) { |
| | | TrieNode nd = allNode.get(i); |
| | | if (nd.Layer == 1) { |
| | | continue; |
| | | } |
| | | |
| | | if (nd.m_values.containsKey(zore)) { |
| | | nd.HasWildcard = true; |
| | | } |
| | | if (nd.Failure.HasWildcard) { |
| | | nd.HasWildcard = true; |
| | | } |
| | | if (nd.Char == 0) { |
| | | nd.IsWildcard = true; |
| | | continue; |
| | | } else if (nd.Parent.IsWildcard) { |
| | | nd.IsWildcard = true; |
| | | nd.WildcardLayer = nd.Parent.WildcardLayer + 1; |
| | | if (nd.Failure != root) { |
| | | if (nd.Failure.Layer <= nd.WildcardLayer) { |
| | | nd.Failure = root; |
| | | } |
| | | } |
| | | continue; |
| | | } |
| | | } |
| | | root.Failure = root; |
| | | |
| | | return allNode; |
| | | } |
| | | |
| | | protected boolean HasMatch(String keyword) { |
| | | for (int i = 0; i < keyword.length(); i++) { |
| | | Character c = keyword.charAt(i); |
| | | if (c == '.' || c == '?' || c == '\\' || c == '[' || c == '(') { |
| | | return true; |
| | | } |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | protected List<String> MatchKeywordBuild(String keyword) throws Exception { |
| | | StringBuilder stringBuilder = new StringBuilder(); |
| | | Map<Integer, List<String>> parameterDict = new Hashtable<Integer, List<String>>(); |
| | | SeparateParameters(keyword, stringBuilder, parameterDict); |
| | | |
| | | if (parameterDict.size() == 0) { |
| | | List<String> al = new ArrayList<String>(); |
| | | al.add(stringBuilder.toString()); |
| | | return al; |
| | | } |
| | | List<String> parameters = new ArrayList<String>(); |
| | | KeywordBuild(parameterDict, 0, parameterDict.keySet().size() - 1, "", parameters); |
| | | String keywordFmt = stringBuilder.toString(); |
| | | List<String> list = new ArrayList<String>(); |
| | | |
| | | String z = ((Character) (char) 0).toString(); |
| | | for (int i = 0; i < parameters.size(); i++) { |
| | | String item = parameters.get(i); |
| | | String[] items = item.split(z); |
| | | Object[] ls = new Object[items.length]; |
| | | for (int j = 0; j < ls.length; j++) { |
| | | ls[j] = items[j]; |
| | | } |
| | | String t = String.format(keywordFmt, ls); |
| | | if (list.contains(t) == false) { |
| | | list.add(t); |
| | | } |
| | | } |
| | | return list; |
| | | } |
| | | |
| | | private void SeparateParameters(String keyword, StringBuilder stringBuilder, |
| | | Map<Integer, List<String>> parameterDict) throws Exception { |
| | | int index = 0; |
| | | int parameterIndex = 0; |
| | | Character zore = 0; |
| | | |
| | | while (index < keyword.length()) { |
| | | Character c = keyword.charAt(index); |
| | | if (c == '.') { |
| | | if (index + 1 < keyword.length() && keyword.charAt(index + 1) == '?') { |
| | | List<String> lt = new ArrayList<String>(); |
| | | lt.add(""); |
| | | lt.add(zore.toString()); |
| | | parameterDict.put(parameterIndex, lt); |
| | | stringBuilder.append("%" + parameterIndex + "$s"); |
| | | parameterIndex++; |
| | | index += 2; |
| | | |
| | | } else { |
| | | stringBuilder.append(((char) 0)); |
| | | index++; |
| | | } |
| | | } else if (c == '\\') { |
| | | if (index + 2 < keyword.length() && keyword.charAt(index + 2) == '?') { |
| | | List<String> lt = new ArrayList<String>(); |
| | | lt.add(""); |
| | | lt.add(((Character) keyword.charAt(index + 1)).toString()); |
| | | parameterDict.put(parameterIndex, lt); |
| | | stringBuilder.append("%" + parameterIndex + "$s"); |
| | | parameterIndex++; |
| | | index += 3; |
| | | } else if (index + 1 < keyword.length()) { |
| | | stringBuilder.append(keyword.charAt(index + 1)); |
| | | index += 2; |
| | | } else { |
| | | throw new Exception("【{keyword}】出错了,最后一位为\\"); |
| | | } |
| | | } else if (c == '[') { |
| | | index++; |
| | | List<String> ps = new ArrayList<String>(); |
| | | while (index < keyword.length()) { |
| | | c = keyword.charAt(index); |
| | | if (c == ']') { |
| | | break; |
| | | } else if (c == '\\') { |
| | | if (index + 1 < keyword.length()) { |
| | | ps.add(((Character) keyword.charAt(index + 1)).toString()); |
| | | index += 2; |
| | | } |
| | | } else { |
| | | ps.add(c.toString()); |
| | | index++; |
| | | } |
| | | } |
| | | if (c != ']') { |
| | | throw new Exception("【{keyword}】出错了,最后一位不为]"); |
| | | } |
| | | if (index + 1 < keyword.length() && keyword.charAt(index + 1) == '?') { |
| | | ps.add(""); |
| | | parameterDict.put(parameterIndex, ps); |
| | | stringBuilder.append("%" + parameterIndex + "$s"); |
| | | parameterIndex++; |
| | | index += 2; |
| | | } else { |
| | | parameterDict.put(parameterIndex, ps); |
| | | stringBuilder.append("%" + parameterIndex + "$s"); |
| | | parameterIndex++; |
| | | index++; |
| | | } |
| | | } else if (c == '(') { |
| | | index++; |
| | | List<String> ps = new ArrayList<String>(); |
| | | String words = ""; |
| | | while (index < keyword.length()) { |
| | | c = keyword.charAt(index); |
| | | if (c == ')') { |
| | | break; |
| | | } else if (c == '|') { |
| | | ps.add(words); |
| | | words = ""; |
| | | index++; |
| | | } else if (c == '\\') { |
| | | if (index + 1 < keyword.length()) { |
| | | words += keyword.charAt(index + 1); |
| | | index += 2; |
| | | } |
| | | } else { |
| | | words += c; |
| | | index++; |
| | | } |
| | | } |
| | | ps.add(words); |
| | | if (c != ')') { |
| | | throw new Exception("【{keyword}】出错了,最后一位不为)"); |
| | | } |
| | | if (index + 1 < keyword.length() && keyword.charAt(index + 1) == '?') { |
| | | ps.add(""); |
| | | parameterDict.put(parameterIndex, ps); |
| | | stringBuilder.append("%" + parameterIndex + "$s"); |
| | | parameterIndex++; |
| | | index += 2; |
| | | } else { |
| | | parameterDict.put(parameterIndex, ps); |
| | | stringBuilder.append("%" + parameterIndex + "$s"); |
| | | parameterIndex++; |
| | | index++; |
| | | } |
| | | } else { |
| | | if (index + 1 < keyword.length() && keyword.charAt(index + 1) == '?') { |
| | | List<String> lt = new ArrayList<String>(); |
| | | lt.add(""); |
| | | lt.add(c.toString()); |
| | | parameterDict.put(parameterIndex, lt); |
| | | stringBuilder.append("%" + parameterIndex + "$s"); |
| | | parameterIndex++; |
| | | index += 2; |
| | | } else { |
| | | if (c == '{') { |
| | | stringBuilder.append("{{"); |
| | | } else if (c == '}') { |
| | | stringBuilder.append("}}"); |
| | | } else { |
| | | stringBuilder.append(c); |
| | | } |
| | | index++; |
| | | } |
| | | } |
| | | } |
| | | |
| | | } |
| | | |
| | | private static void KeywordBuild(Map<Integer, List<String>> parameterDict, int index, int end, String keyword, |
| | | List<String> result) { |
| | | Character span = (char) 1; |
| | | List<String> list = parameterDict.get(index); |
| | | if (index == end) { |
| | | for (int i = 0; i < list.size(); i++) { |
| | | String item = list.get(i); |
| | | result.add((keyword + span + item).substring(1)); |
| | | } |
| | | } else { |
| | | for (int i = 0; i < list.size(); i++) { |
| | | String item = list.get(i); |
| | | KeywordBuild(parameterDict, index + 1, end, keyword + span + item, result); |
| | | } |
| | | } |
| | | } |
| | | |
| | | /** |
| | | * 设置关键字 |
| | | * |
| | | * @param keywords 关键字列表 |
| | | * @throws Exception |
| | | */ |
| | | public void SetKeywords(List<String> keywords) throws Exception { |
| | | _matchKeywords = keywords.toArray(new String[0]); |
| | | List<String> newKeyword = new ArrayList<String>(); |
| | | List<Integer> newKeywordLength = new ArrayList<Integer>(); |
| | | List<Integer> newKeywordIndex = new ArrayList<Integer>(); |
| | | Integer index = 0; |
| | | for (String keyword : keywords) { |
| | | if (HasMatch(keyword) == false) { |
| | | newKeyword.add(keyword); |
| | | newKeywordLength.add(keyword.length()); |
| | | newKeywordIndex.add(index); |
| | | } else { |
| | | List<String> list = MatchKeywordBuild(keyword); |
| | | for (String item : list) { |
| | | newKeyword.add(item); |
| | | newKeywordLength.add(item.length()); |
| | | newKeywordIndex.add(index); |
| | | } |
| | | } |
| | | index++; |
| | | } |
| | | _keywordLength = new int[newKeywordLength.size()]; |
| | | for (int i = 0; i < _keywordLength.length; i++) { |
| | | _keywordLength[i] = newKeywordLength.get(i); |
| | | } |
| | | _keywordIndex = new int[newKeywordIndex.size()]; |
| | | for (int j = 0; j < _keywordIndex.length; j++) { |
| | | _keywordIndex[j] = newKeywordIndex.get(j); |
| | | } |
| | | |
| | | SetKeywords2(newKeyword); |
| | | } |
| | | |
| | | protected void SetKeywords2(List<String> keywords) { |
| | | List<TrieNode> allNode = BuildFirstLayerTrieNode(keywords); |
| | | TrieNode root = allNode.get(0); |
| | | |
| | | List<TrieNode3> allNode2 = new ArrayList<TrieNode3>(); |
| | | for (int i = 0; i < allNode.size(); i++) { |
| | | allNode2.add(new TrieNode3()); |
| | | } |
| | | |
| | | for (int i = 0; i < allNode2.size(); i++) { |
| | | TrieNode oldNode = allNode.get(i); |
| | | TrieNode3 newNode = allNode2.get(i); |
| | | |
| | | for (Character key : oldNode.m_values.keySet()) { |
| | | int index = oldNode.m_values.get(key).Index; |
| | | if (key == 0) { |
| | | newNode.HasWildcard = true; |
| | | newNode.WildcardNode = allNode2.get(index); |
| | | continue; |
| | | } |
| | | newNode.Add(key, allNode2.get(index)); |
| | | } |
| | | for (Integer item : oldNode.Results) { |
| | | if (oldNode.IsWildcard) { |
| | | if (keywords.get(item).length() > oldNode.WildcardLayer) { |
| | | newNode.SetResults(item); |
| | | } |
| | | } else { |
| | | newNode.SetResults(item); |
| | | } |
| | | } |
| | | |
| | | TrieNode failure = oldNode.Failure; |
| | | while (failure != root) { |
| | | if (oldNode.IsWildcard && failure.Layer <= oldNode.WildcardLayer) { |
| | | break; |
| | | } |
| | | for (Character key : failure.m_values.keySet()) { |
| | | int index = failure.m_values.get(key).Index; |
| | | if (key == 0) { |
| | | newNode.HasWildcard = true; |
| | | if (newNode.WildcardNode == null) { |
| | | newNode.WildcardNode = allNode2.get(index); |
| | | } |
| | | continue; |
| | | } |
| | | if (newNode.HasKey(key) == false) { |
| | | newNode.Add(key, allNode2.get(index)); |
| | | } |
| | | } |
| | | for (Integer item : failure.Results) { |
| | | if (oldNode.IsWildcard) { |
| | | if (keywords.get(item).length() > oldNode.WildcardLayer) { |
| | | newNode.SetResults(item); |
| | | } |
| | | } else { |
| | | newNode.SetResults(item); |
| | | } |
| | | } |
| | | failure = failure.Failure; |
| | | } |
| | | } |
| | | allNode.clear(); |
| | | allNode = null; |
| | | root = null; |
| | | |
| | | // var root2 = allNode2[0]; |
| | | TrieNode3[] first = new TrieNode3[Character.MAX_VALUE + 1]; |
| | | for (Character key : allNode2.get(0).m_values.keySet()) { |
| | | first[key] = allNode2.get(0).m_values.get(key); |
| | | } |
| | | _first = first; |
| | | } |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words.internals; |
| | | |
| | | import java.util.*; |
| | | |
| | | import static java.util.stream.Collectors.toMap; |
| | | |
| | | public class BaseMatchEx extends BaseMatch { |
| | | protected int[] _dict; |
| | | protected int[] _firstIndex; |
| | | protected int[] _min; |
| | | protected int[] _max; |
| | | |
| | | protected IntDictionary[] _nextIndex; |
| | | protected int[] _wildcard; |
| | | protected int[] _end; |
| | | protected int[] _resultIndex; |
| | | |
| | | |
| | | @Override |
| | | protected void SetKeywords2(List<String> keywords) |
| | | { |
| | | List<TrieNode> allNode = BuildFirstLayerTrieNode(keywords); |
| | | TrieNode root = allNode.get(0); |
| | | |
| | | StringBuilder stringBuilder = new StringBuilder(); |
| | | for (int i = 1; i < allNode.size(); i++) { |
| | | stringBuilder.append(allNode.get(i).Char); |
| | | } |
| | | CreateDict(stringBuilder.toString()); |
| | | stringBuilder = null; |
| | | |
| | | |
| | | List<TrieNode3Ex> allNode2 = new ArrayList<TrieNode3Ex>(); |
| | | for (int i = 0; i < allNode.size(); i++) { |
| | | TrieNode3Ex node3=new TrieNode3Ex(); |
| | | node3.Index=i; |
| | | allNode2.add(node3); ; |
| | | } |
| | | |
| | | for (int i = 0; i < allNode2.size(); i++) { |
| | | TrieNode oldNode = allNode.get(i); |
| | | TrieNode3Ex newNode = allNode2.get(i); |
| | | |
| | | for (Character item : oldNode.m_values.keySet()) { |
| | | int key = _dict[item]; |
| | | int index = oldNode.m_values.get(item).Index; |
| | | if (key == 0) { |
| | | newNode.HasWildcard = true; |
| | | newNode.WildcardNode = allNode2.get(index) ; |
| | | continue; |
| | | } |
| | | newNode.Add((char)key, allNode2.get(index) ); |
| | | } |
| | | for (int item : oldNode.Results) { |
| | | if (oldNode.IsWildcard) { |
| | | if (keywords.get(item).length() > oldNode.WildcardLayer) { |
| | | newNode.SetResults(item); |
| | | } |
| | | } else { |
| | | newNode.SetResults(item); |
| | | } |
| | | //newNode.SetResults(item); |
| | | } |
| | | |
| | | TrieNode failure = oldNode.Failure; |
| | | while (failure != root) { |
| | | if (oldNode.IsWildcard && failure.Layer <= oldNode.WildcardLayer) { |
| | | break; |
| | | } |
| | | for (Character item : failure.m_values.keySet()) { |
| | | int key = _dict[item]; |
| | | int index = failure.m_values.get(item).Index; |
| | | if (key == 0) { |
| | | newNode.HasWildcard = true; |
| | | if (newNode.WildcardNode == null) { |
| | | newNode.WildcardNode = allNode2.get(index); |
| | | } |
| | | continue; |
| | | } |
| | | if (newNode.HasKey((char)key) == false) { |
| | | newNode.Add((char)key, allNode2.get(index)); |
| | | } |
| | | } |
| | | for (int item : failure.Results) { |
| | | if (oldNode.IsWildcard) { |
| | | if (keywords.get(item).length() > oldNode.WildcardLayer) { |
| | | newNode.SetResults(item); |
| | | } |
| | | } else { |
| | | newNode.SetResults(item); |
| | | } |
| | | } |
| | | failure = failure.Failure; |
| | | } |
| | | } |
| | | allNode.clear(); |
| | | allNode = null; |
| | | root = null; |
| | | |
| | | |
| | | List<Integer> min = new ArrayList<Integer>(); |
| | | List<Integer> max = new ArrayList<Integer>(); |
| | | List<Integer> wildcard = new ArrayList<Integer>(); |
| | | List<Map<Integer, Integer>> nextIndexs = new ArrayList<Map<Integer, Integer>>(); |
| | | List<Integer> end = new ArrayList<Integer>() ; |
| | | end.add(0); |
| | | List<Integer> resultIndex = new ArrayList<Integer>(); |
| | | for (int i = 0; i < allNode2.size(); i++) { |
| | | Map<Integer, Integer> dict = new HashMap<Integer, Integer>(); |
| | | TrieNode3Ex node = allNode2.get(i); |
| | | min.add(node.minflag); |
| | | max.add(node.maxflag); |
| | | |
| | | if (node.HasWildcard) { |
| | | wildcard.add(node.WildcardNode.Index); |
| | | } else { |
| | | wildcard.add(0); |
| | | } |
| | | |
| | | if (i > 0) { |
| | | for (Character item : node.m_values.keySet()) { |
| | | dict.put((Integer)(int)(item) , node.m_values.get(item).Index); |
| | | } |
| | | } |
| | | for (int item : node.Results) { |
| | | resultIndex.add(item); |
| | | } |
| | | end.add(resultIndex.size()); |
| | | nextIndexs.add(dict); |
| | | } |
| | | int[] first = new int[Character.MAX_VALUE + 1]; |
| | | for (Character item : allNode2.get(0).m_values.keySet()) { |
| | | first[item] = allNode2.get(0).m_values.get(item).Index; |
| | | } |
| | | |
| | | _firstIndex = first; |
| | | _min = new int[min.size()]; |
| | | _max = new int[min.size()]; |
| | | for (int i = 0; i < min.size(); i++) { |
| | | _min[i] = (int) (min.get(i)); |
| | | _max[i] = (int) (max.get(i)); |
| | | } |
| | | _nextIndex = new IntDictionary[nextIndexs.size()]; |
| | | for (int i = 0; i < nextIndexs.size(); i++) { |
| | | IntDictionary dictionary = new IntDictionary(); |
| | | dictionary.SetDictionary(nextIndexs.get(i)); |
| | | _nextIndex[i] = dictionary; |
| | | } |
| | | _wildcard= new int[wildcard.size()]; |
| | | for (int i = 0; i < wildcard.size(); i++) { |
| | | _wildcard[i] = (int) (wildcard.get(i)); |
| | | } |
| | | _end = new int[end.size()]; |
| | | for (int i = 0; i < end.size(); i++) { |
| | | _end[i] = (int) (end.get(i)); |
| | | } |
| | | _resultIndex = new int[resultIndex.size()]; |
| | | for (int i = 0; i < resultIndex.size(); i++) { |
| | | _resultIndex[i] = (int) (resultIndex.get(i)); |
| | | } |
| | | allNode2.clear(); |
| | | allNode2 = null; |
| | | } |
| | | |
| | | private int CreateDict(String keywords) { |
| | | Map<Character, Integer> dictionary = new Hashtable<Character, Integer>(); |
| | | for (int i = 0; i < keywords.length(); i++) { |
| | | Character item = keywords.charAt(i); |
| | | if (dictionary.containsKey(item)) { |
| | | dictionary.put(item, dictionary.get(item) + 1); |
| | | } else { |
| | | dictionary.put(item, 1); |
| | | } |
| | | } |
| | | Map<Character, Integer> dictionary2 = dictionary.entrySet().stream() |
| | | .sorted(Collections.reverseOrder(Map.Entry.comparingByValue())) |
| | | .collect(toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e2, LinkedHashMap::new)); |
| | | |
| | | List<Character> list2 = new ArrayList<Character>(); |
| | | for (Character item : dictionary2.keySet()) { |
| | | list2.add(item); |
| | | } |
| | | |
| | | _dict = new int[Character.MAX_VALUE + 1]; |
| | | for (int i = 0; i < list2.size(); i++) { |
| | | _dict[list2.get(i)] = i + 1; |
| | | } |
| | | return dictionary.size(); |
| | | } |
| | | |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words.internals; |
| | | |
| | | import org.springblade.modules.words.WordsSearch; |
| | | import org.springblade.modules.words.WordsSearchResult; |
| | | |
| | | import java.io.IOException; |
| | | import java.util.ArrayList; |
| | | import java.util.HashSet; |
| | | import java.util.List; |
| | | import java.util.Set; |
| | | |
| | | public class BasePinyinMatch { |
| | | |
| | | public class PinyinSearch extends BaseSearch { |
| | | String[][] _keywordPinyins; |
| | | int[] _indexs; |
| | | |
| | | public void SetIndexs(final int[] indexs) { |
| | | _indexs = indexs; |
| | | } |
| | | public void SetIndexs(List<Integer> indexs) { |
| | | _indexs=new int[indexs.size()]; |
| | | for (int i = 0; i < indexs.size(); i++) { |
| | | _indexs[i]=indexs.get(i); |
| | | } |
| | | } |
| | | |
| | | public void SetKeywords2(List<TwoTuple<String, String[]>> keywords) { |
| | | _keywords = new String[keywords.size()]; |
| | | _keywordPinyins = new String[keywords.size()][]; |
| | | for (int i = 0; i < keywords.size(); i++) { |
| | | _keywords[i] = keywords.get(i).Item1; |
| | | _keywordPinyins[i] = keywords.get(i).Item2; |
| | | } |
| | | SetKeywords(); |
| | | } |
| | | |
| | | public boolean Find(final String text, final String hz, final String[] pinyins) { |
| | | TrieNode2 ptr = null; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final Character t = text.charAt(i); |
| | | TrieNode2 tn; |
| | | if (ptr == null) { |
| | | tn = _first[t]; |
| | | } else { |
| | | if (ptr.HasKey(t) == false) { |
| | | tn = _first[t]; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | } |
| | | if (tn != null) { |
| | | if (tn.End) { |
| | | for (final int result : tn.Results) { |
| | | final String keyword = _keywords[result]; |
| | | final int start = i + 1 - keyword.length(); |
| | | boolean isok = true; |
| | | final String[] keywordPinyins = _keywordPinyins[result]; |
| | | |
| | | for (int j = 0; j < keyword.length(); j++) { |
| | | final int idx = start + j; |
| | | final String py = keywordPinyins[j]; |
| | | if (py.length() == 1 && py.charAt(0) >= 0x3400 && py.charAt(0) <= 0x9fd5) { |
| | | if (hz.charAt(idx) != py.charAt(0)) { |
| | | isok = false; |
| | | break; |
| | | } |
| | | } else { |
| | | if (pinyins[idx].startsWith(py) == false) { |
| | | isok = false; |
| | | break; |
| | | } |
| | | } |
| | | } |
| | | if (isok) { |
| | | return true; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | public boolean Find2(final String text, final String hz, final String[] pinyins, final int keysCount) { |
| | | int findCount = 0; |
| | | int lastWordsIndex = -1; |
| | | TrieNode2 ptr = null; |
| | | for (int i = 0; i < text.length(); i++) { |
| | | final Character t = text.charAt(i); |
| | | TrieNode2 tn; |
| | | if (ptr == null) { |
| | | tn = _first[t]; |
| | | } else { |
| | | if (ptr.HasKey(t) == false) { |
| | | tn = _first[t]; |
| | | } else { |
| | | tn = ptr.GetValue(t); |
| | | } |
| | | } |
| | | if (tn != null) { |
| | | if (tn.End) { |
| | | for (final Integer result : tn.Results) { |
| | | final int index = _indexs[result]; |
| | | if (index != findCount) { |
| | | continue; |
| | | } |
| | | |
| | | final String keyword = _keywords[result]; |
| | | final int start = i + 1 - keyword.length(); |
| | | if (lastWordsIndex >= start) { |
| | | continue; |
| | | } |
| | | |
| | | boolean isok = true; |
| | | final String[] keywordPinyins = _keywordPinyins[result]; |
| | | |
| | | for (int j = 0; j < keyword.length(); j++) { |
| | | final int idx = start + j; |
| | | final String py = keywordPinyins[j]; |
| | | if (py.length() == 1 && py.charAt(0) >= 0x3400 && py.charAt(0) <= 0x9fd5) { |
| | | if (hz.charAt(idx) != py.charAt(0)) { |
| | | isok = false; |
| | | break; |
| | | } |
| | | } else { |
| | | if (pinyins[idx].startsWith(py) == false) { |
| | | isok = false; |
| | | break; |
| | | } |
| | | } |
| | | } |
| | | if (isok) { |
| | | findCount++; |
| | | lastWordsIndex = i; |
| | | if (findCount == keysCount) { |
| | | return true; |
| | | } |
| | | break; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | ptr = tn; |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | } |
| | | |
| | | protected void MergeKeywords(String[] keys, int id, String keyword, List<TwoTuple<String, String[]>> list) |
| | | throws NumberFormatException, IOException { |
| | | if (id >= keys.length) { |
| | | TwoTuple<String, String[]> tuple = new TwoTuple<String, String[]>(keyword, keys); |
| | | list.add(tuple); |
| | | return; |
| | | } |
| | | String key = keys[id]; |
| | | if (key.charAt(0) >= 0x3400 && key.charAt(0) <= 0x9fd5) { |
| | | List<String> all = PinyinDict.GetAllPinyin(key.charAt(0), 0); |
| | | Set<Character> fpy = new HashSet<Character>(); |
| | | for (String item : all) { |
| | | fpy.add(item.charAt(0)); |
| | | } |
| | | for (Character item : fpy) { |
| | | MergeKeywords(keys, id + 1, keyword + item, list); |
| | | } |
| | | } else { |
| | | MergeKeywords(keys, id + 1, keyword + key.charAt(0), list); |
| | | } |
| | | } |
| | | |
| | | protected void MergeKeywords(String[] keys, int id, String keyword, List<TwoTuple<String, String[]>> list, |
| | | int index, List<Integer> indexs) throws NumberFormatException, IOException { |
| | | if (id >= keys.length) { |
| | | TwoTuple<String, String[]> tuple = new TwoTuple<String, String[]>(keyword, keys); |
| | | list.add(tuple); |
| | | indexs.add(index); |
| | | return; |
| | | } |
| | | String key = keys[id]; |
| | | if (key.charAt(0) >= 0x3400 && key.charAt(0) <= 0x9fd5) { |
| | | List<String> all = PinyinDict.GetAllPinyin(key.charAt(0), 0); |
| | | Set<Character> fpy = new HashSet<Character>(); |
| | | for (String item : all) { |
| | | fpy.add(item.charAt(0)); |
| | | } |
| | | for (Character item : fpy) { |
| | | MergeKeywords(keys, id + 1, keyword + item, list, index, indexs); |
| | | } |
| | | } else { |
| | | MergeKeywords(keys, id + 1, keyword + key.charAt(0), list, index, indexs); |
| | | } |
| | | } |
| | | |
| | | protected List<String> SplitKeywords(String key) throws NumberFormatException, IOException { |
| | | InitPinyinSearch(); |
| | | List<TextNode> textNodes = new ArrayList<TextNode>(); |
| | | for (int i = 0; i <= key.length(); i++) { |
| | | textNodes.add(new TextNode()); |
| | | } |
| | | textNodes.get(textNodes.size() - 1).End = true; |
| | | |
| | | for (int i = 0; i < key.length(); i++) { |
| | | TextLine line = new TextLine(); |
| | | line.Next = textNodes.get(i + 1); |
| | | line.Words = ((Character) key.charAt(i)).toString(); |
| | | textNodes.get(i).Children.add(line); |
| | | } |
| | | |
| | | List<WordsSearchResult> all = _wordsSearch.FindAll(key); |
| | | for (WordsSearchResult searchResult : all) { |
| | | TextLine line = new TextLine(); |
| | | line.Next = textNodes.get(searchResult.End + 1); |
| | | line.Words = searchResult.Keyword; |
| | | textNodes.get(searchResult.Start).Children.add(line); |
| | | } |
| | | |
| | | List<String> list = new ArrayList<String>(); |
| | | BuildKsywords(textNodes.get(0), 0, "", list); |
| | | return list; |
| | | } |
| | | |
| | | private void BuildKsywords(TextNode textNode, int id, String keywords, List<String> list) { |
| | | if (textNode.End) { |
| | | String k = keywords.substring(1); |
| | | if (list.contains(k) == false) { |
| | | list.add(k); |
| | | } |
| | | return; |
| | | } |
| | | for (TextLine item : textNode.Children) { |
| | | BuildKsywords(item.Next, id + 1, keywords + (char) 0 + item.Words, list); |
| | | } |
| | | } |
| | | |
| | | class TextNode { |
| | | public boolean End; |
| | | public List<TextLine> Children = new ArrayList<TextLine>(); |
| | | } |
| | | |
| | | class TextLine { |
| | | public String Words; |
| | | public TextNode Next; |
| | | } |
| | | |
| | | private static WordsSearch _wordsSearch; |
| | | |
| | | private void InitPinyinSearch() throws NumberFormatException, IOException { |
| | | if (_wordsSearch == null) { |
| | | List<String> allPinyins = new ArrayList<String>(); |
| | | String[] pys = PinyinDict.getPyShow(); |
| | | for (int i = 1; i < pys.length; i += 2) { |
| | | String py = pys[i].toUpperCase(); |
| | | for (int j = 1; j <= py.length(); j++) { |
| | | String key = py.substring(0, j); |
| | | if (allPinyins.contains(key) == false) { |
| | | allPinyins.add(key); |
| | | } |
| | | |
| | | } |
| | | } |
| | | WordsSearch wordsSearch = new WordsSearch(); |
| | | wordsSearch.SetKeywords(allPinyins); |
| | | _wordsSearch = wordsSearch; |
| | | } |
| | | } |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words.internals; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.Hashtable; |
| | | import java.util.List; |
| | | import java.util.Map; |
| | | |
| | | public class BaseSearch { |
| | | protected TrieNode2[] _first = new TrieNode2[Character.MAX_VALUE + 1]; |
| | | protected String[] _keywords; |
| | | |
| | | |
| | | /** |
| | | * 设置关键字 |
| | | * |
| | | * @param keywords 关键字列表 |
| | | */ |
| | | public void SetKeywords(List<String> keywords) { |
| | | _keywords = new String[keywords.size()]; |
| | | _keywords = keywords.toArray(_keywords); |
| | | SetKeywords(); |
| | | } |
| | | |
| | | protected void SetKeywords() { |
| | | TrieNode root = new TrieNode(); |
| | | Map<Integer, List<TrieNode>> allNodeLayers = new Hashtable<Integer, List<TrieNode>>(); |
| | | for (int i = 0; i < _keywords.length; i++) { |
| | | String p = _keywords[i]; |
| | | TrieNode nd = root; |
| | | for (int j = 0; j < p.length(); j++) { |
| | | nd = nd.Add(p.charAt(j)); |
| | | if (nd.Layer == 0) { |
| | | nd.Layer = j + 1; |
| | | if (allNodeLayers.containsKey(nd.Layer) == false) { |
| | | List<TrieNode> nodes = new ArrayList<TrieNode>(); |
| | | nodes.add(nd); |
| | | allNodeLayers.put(nd.Layer, nodes); |
| | | } else { |
| | | allNodeLayers.get(nd.Layer).add(nd); |
| | | } |
| | | } |
| | | } |
| | | nd.SetResults(i); |
| | | } |
| | | |
| | | List<TrieNode> allNode = new ArrayList<TrieNode>(); |
| | | allNode.add(root); |
| | | for (int i = 0; i < allNodeLayers.size(); i++) { // 注意 这里不能用 keySet() |
| | | List<TrieNode> nodes = allNodeLayers.get(i + 1); |
| | | for (int j = 0; j < nodes.size(); j++) { |
| | | allNode.add(nodes.get(j)); |
| | | } |
| | | } |
| | | allNodeLayers.clear(); |
| | | allNodeLayers = null; |
| | | |
| | | for (int i = 1; i < allNode.size(); i++) { |
| | | TrieNode nd = allNode.get(i); |
| | | nd.Index = i; |
| | | TrieNode r = nd.Parent.Failure; |
| | | Character c = nd.Char; |
| | | while (r != null && !r.m_values.containsKey(c)) |
| | | r = r.Failure; |
| | | if (r == null) |
| | | nd.Failure = root; |
| | | else { |
| | | nd.Failure = r.m_values.get(c); |
| | | for (Integer result : nd.Failure.Results) { |
| | | nd.SetResults(result); |
| | | } |
| | | } |
| | | } |
| | | root.Failure = root; |
| | | |
| | | List<TrieNode2> allNode2 = new ArrayList<TrieNode2>(); |
| | | for (int i = 0; i < allNode.size(); i++) { |
| | | allNode2.add(new TrieNode2()); |
| | | } |
| | | for (int i = 0; i < allNode2.size(); i++) { |
| | | TrieNode oldNode = allNode.get(i); |
| | | TrieNode2 newNode = allNode2.get(i); |
| | | |
| | | for (Character key : oldNode.m_values.keySet()) { |
| | | TrieNode nd = oldNode.m_values.get(key); |
| | | newNode.Add(key, allNode2.get(nd.Index)); |
| | | } |
| | | oldNode.Results.forEach(item -> { |
| | | newNode.SetResults(item); |
| | | }); |
| | | |
| | | oldNode = oldNode.Failure; |
| | | while (oldNode != root) { |
| | | for (Character key : oldNode.m_values.keySet()) { |
| | | TrieNode nd = oldNode.m_values.get(key); |
| | | if (newNode.HasKey(key) == false) { |
| | | newNode.Add(key, allNode2.get(nd.Index)); |
| | | } |
| | | } |
| | | oldNode.Results.forEach(item -> { |
| | | newNode.SetResults(item); |
| | | }); |
| | | oldNode = oldNode.Failure; |
| | | } |
| | | } |
| | | allNode.clear(); |
| | | allNode = null; |
| | | root = null; |
| | | |
| | | TrieNode2[] first = new TrieNode2[Character.MAX_VALUE + 1]; |
| | | TrieNode2 root2 = allNode2.get(0); |
| | | for (Character key : root2.m_values.keySet()) { |
| | | TrieNode2 nd = root2.m_values.get(key); |
| | | first[(int) key] = nd; |
| | | } |
| | | _first = first; |
| | | } |
| | | |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words.internals; |
| | | |
| | | import org.springblade.modules.words.NumHelper; |
| | | |
| | | import java.io.*; |
| | | import java.util.*; |
| | | |
| | | import static java.util.stream.Collectors.toMap; |
| | | |
| | | public class BaseSearchEx { |
| | | protected int[] _dict; |
| | | protected int[] _first; |
| | | protected int[] _min; |
| | | protected int[] _max; |
| | | |
| | | protected IntDictionary[] _nextIndex; |
| | | protected int[] _end; |
| | | protected int[] _resultIndex; |
| | | protected String[] _keywords; |
| | | |
| | | /** |
| | | * 保存, 修改于2020-08-06,使用utf-8保存,与以前数据可能会不同 |
| | | * |
| | | * @param filePath 文件地址 |
| | | * @throws IOException |
| | | */ |
| | | public void Save(String filePath) throws IOException { |
| | | File fi = new File(filePath); |
| | | FileOutputStream fs = new FileOutputStream(fi); |
| | | Save(fs); |
| | | fs.close(); |
| | | } |
| | | |
| | | protected void Save(FileOutputStream bw) throws IOException { |
| | | bw.write(NumHelper.serialize(_keywords.length)); |
| | | for (String item : _keywords) { |
| | | byte[] bytes = item.getBytes("utf-8"); |
| | | bw.write(NumHelper.serialize(bytes.length)); |
| | | bw.write(bytes); |
| | | } |
| | | |
| | | bw.write(NumHelper.serialize(_dict.length)); |
| | | for (int item : _dict) { |
| | | bw.write(NumHelper.serialize(item)); |
| | | } |
| | | |
| | | bw.write(NumHelper.serialize(_first.length)); |
| | | for (int item : _first) { |
| | | bw.write(NumHelper.serialize(item)); |
| | | } |
| | | bw.write(NumHelper.serialize(_min.length)); |
| | | for (int item : _min) { |
| | | bw.write(NumHelper.serialize(item)); |
| | | } |
| | | bw.write(NumHelper.serialize(_max.length)); |
| | | for (int item : _max) { |
| | | bw.write(NumHelper.serialize(item)); |
| | | } |
| | | bw.write(NumHelper.serialize(_end.length)); |
| | | for (int item : _end) { |
| | | bw.write(NumHelper.serialize(item)); |
| | | } |
| | | bw.write(NumHelper.serialize(_resultIndex.length)); |
| | | for (int item : _resultIndex) { |
| | | bw.write(NumHelper.serialize(item)); |
| | | } |
| | | |
| | | bw.write(NumHelper.serialize(_nextIndex.length)); |
| | | for (int i = 0; i < _nextIndex.length; i++) { |
| | | int[] keys = _nextIndex[i].getKeys(); |
| | | bw.write(NumHelper.serialize(keys.length)); |
| | | for (int item : keys) { |
| | | bw.write(NumHelper.serialize(item)); |
| | | } |
| | | |
| | | int[] values = _nextIndex[i].getValues(); |
| | | bw.write(NumHelper.serialize(values.length)); |
| | | for (int item : values) { |
| | | bw.write(NumHelper.serialize(item)); |
| | | } |
| | | } |
| | | } |
| | | |
| | | /** |
| | | * 加载, 修改于2020-08-06,使用utf-8加载,加载以前数据可能会出错 |
| | | * |
| | | * @param filePath |
| | | * @throws FileNotFoundException |
| | | * @throws IOException |
| | | */ |
| | | public void Load(String filePath) throws FileNotFoundException, IOException { |
| | | File fi = new File(filePath); |
| | | InputStream in = new BufferedInputStream(new FileInputStream(fi)); |
| | | Load(in); |
| | | in.close(); |
| | | } |
| | | |
| | | public void Load(InputStream br) throws IOException { |
| | | int length = NumHelper.read(br); |
| | | _keywords = new String[length]; |
| | | for (int i = 0; i < length; i++) { |
| | | int l = NumHelper.read(br); |
| | | byte[] bytes = new byte[l]; |
| | | br.read(bytes, 0, l); |
| | | _keywords[i] = new String(bytes,"utf-8"); |
| | | } |
| | | |
| | | length = NumHelper.read(br); |
| | | _dict = new int[length]; |
| | | for (int i = 0; i < length; i++) { |
| | | _dict[i] = NumHelper.read(br); |
| | | } |
| | | |
| | | length = NumHelper.read(br); |
| | | _first = new int[length]; |
| | | for (int i = 0; i < length; i++) { |
| | | _first[i] = NumHelper.read(br); |
| | | } |
| | | |
| | | length = NumHelper.read(br); |
| | | _min = new int[length]; |
| | | for (int i = 0; i < length; i++) { |
| | | _min[i] = NumHelper.read(br); |
| | | } |
| | | |
| | | length = NumHelper.read(br); |
| | | _max = new int[length]; |
| | | for (int i = 0; i < length; i++) { |
| | | _max[i] = NumHelper.read(br); |
| | | } |
| | | |
| | | length = NumHelper.read(br); |
| | | _end = new int[length]; |
| | | for (int i = 0; i < length; i++) { |
| | | _end[i] = NumHelper.read(br); |
| | | } |
| | | |
| | | length = NumHelper.read(br); |
| | | _resultIndex = new int[length]; |
| | | for (int i = 0; i < length; i++) { |
| | | _resultIndex[i] = NumHelper.read(br); |
| | | } |
| | | |
| | | length = NumHelper.read(br); |
| | | _nextIndex = new IntDictionary[length]; |
| | | for (int i = 0; i < length; i++) { |
| | | int l2 = NumHelper.read(br); |
| | | int[] keys = new int[l2]; |
| | | for (int j = 0; j < keys.length; j++) { |
| | | keys[j] = NumHelper.read(br); |
| | | } |
| | | |
| | | l2 = NumHelper.read(br); |
| | | int[] values = new int[l2]; |
| | | for (int j = 0; j < values.length; j++) { |
| | | values[j] = NumHelper.read(br); |
| | | } |
| | | _nextIndex[i] = new IntDictionary(); |
| | | _nextIndex[i].SetDictionary(keys, values); |
| | | } |
| | | } |
| | | |
| | | /** |
| | | * 设置关键字 |
| | | * |
| | | * @param keywords |
| | | */ |
| | | public void SetKeywords(List<String> keywords) { |
| | | _keywords = keywords.toArray(new String[0]); |
| | | SetKeywords(); |
| | | } |
| | | |
| | | private void SetKeywords() { |
| | | TrieNode root = new TrieNode(); |
| | | Map<Integer, List<TrieNode>> allNodeLayers = new TreeMap<Integer, List<TrieNode>>(); |
| | | for (int i = 0; i < _keywords.length; i++) { |
| | | String p = _keywords[i]; |
| | | TrieNode nd = root; |
| | | for (int j = 0; j < p.length(); j++) { |
| | | nd = nd.Add(p.charAt(j)); |
| | | if (nd.Layer == 0) { |
| | | nd.Layer = j + 1; |
| | | if (allNodeLayers.containsKey(nd.Layer) == false) { |
| | | List<TrieNode> nodes = new ArrayList<TrieNode>(); |
| | | nodes.add(nd); |
| | | allNodeLayers.put(nd.Layer, nodes); |
| | | } else { |
| | | allNodeLayers.get(nd.Layer).add(nd); |
| | | } |
| | | } |
| | | } |
| | | nd.SetResults(i); |
| | | } |
| | | |
| | | List<TrieNode> allNode = new ArrayList<TrieNode>(); |
| | | allNode.add(root); |
| | | for (int i = 0; i < allNodeLayers.size(); i++) { // 注意 这里不能用 keySet() |
| | | List<TrieNode> nodes = allNodeLayers.get(i + 1); |
| | | for (int j = 0; j < nodes.size(); j++) { |
| | | allNode.add(nodes.get(j)); |
| | | } |
| | | } |
| | | allNodeLayers.clear(); |
| | | allNodeLayers = null; |
| | | |
| | | for (int i = 1; i < allNode.size(); i++) { |
| | | TrieNode nd = allNode.get(i); |
| | | nd.Index = i; |
| | | TrieNode r = nd.Parent.Failure; |
| | | Character c = nd.Char; |
| | | while (r != null && !r.m_values.containsKey(c)) |
| | | r = r.Failure; |
| | | if (r == null) |
| | | nd.Failure = root; |
| | | else { |
| | | nd.Failure = r.m_values.get(c); |
| | | for (Integer result : nd.Failure.Results) { |
| | | nd.SetResults(result); |
| | | } |
| | | } |
| | | } |
| | | root.Failure = root; |
| | | |
| | | StringBuilder stringBuilder = new StringBuilder(); |
| | | for (int i = 1; i < allNode.size(); i++) { |
| | | stringBuilder.append(allNode.get(i).Char); |
| | | } |
| | | CreateDict(stringBuilder.toString()); |
| | | stringBuilder = null; |
| | | |
| | | List<TrieNode2Ex> allNode2 = new ArrayList<TrieNode2Ex>(); |
| | | for (int i = 0; i < allNode.size(); i++) { |
| | | TrieNode2Ex nd = new TrieNode2Ex(); |
| | | nd.Index = i; |
| | | allNode2.add(nd); |
| | | } |
| | | for (int i = 0; i < allNode2.size(); i++) { |
| | | TrieNode oldNode = allNode.get(i); |
| | | TrieNode2Ex newNode = allNode2.get(i); |
| | | |
| | | for (Character key : oldNode.m_values.keySet()) { |
| | | TrieNode nd = oldNode.m_values.get(key); |
| | | newNode.Add(_dict[key], allNode2.get(nd.Index)); |
| | | } |
| | | oldNode.Results.forEach(item -> { |
| | | newNode.SetResults(item); |
| | | }); |
| | | |
| | | oldNode = oldNode.Failure; |
| | | while (oldNode != root) { |
| | | for (Character key : oldNode.m_values.keySet()) { |
| | | if (newNode.HasKey(_dict[key]) == false) { |
| | | TrieNode nd = oldNode.m_values.get(key); |
| | | newNode.Add(_dict[key], allNode2.get(nd.Index)); |
| | | } |
| | | } |
| | | oldNode.Results.forEach(item -> { |
| | | newNode.SetResults(item); |
| | | }); |
| | | oldNode = oldNode.Failure; |
| | | } |
| | | } |
| | | allNode.clear(); |
| | | allNode = null; |
| | | root = null; |
| | | |
| | | List<Integer> min = new ArrayList<Integer>(); |
| | | List<Integer> max = new ArrayList<Integer>(); |
| | | List<Map<Integer, Integer>> nextIndexs = new ArrayList<Map<Integer, Integer>>(); |
| | | List<Integer> end = new ArrayList<Integer>(); |
| | | end.add(0); |
| | | List<Integer> resultIndex = new ArrayList<Integer>(); |
| | | for (int i = 0; i < allNode2.size(); i++) { |
| | | Map<Integer, Integer> dict = new TreeMap<Integer, Integer>(); |
| | | TrieNode2Ex node = allNode2.get(i); |
| | | min.add(node.minflag); |
| | | max.add(node.maxflag); |
| | | |
| | | if (i > 0) { |
| | | for (Integer key : node.m_values.keySet()) { |
| | | dict.put(key, node.m_values.get(key).Index); |
| | | } |
| | | } |
| | | for (int j = 0; j < node.Results.size(); j++) { |
| | | resultIndex.add(node.Results.get(j)); |
| | | } |
| | | end.add(resultIndex.size()); |
| | | nextIndexs.add(dict); |
| | | } |
| | | int[] first = new int[Character.MAX_VALUE + 1]; |
| | | for (Integer key : allNode2.get(0).m_values.keySet()) { |
| | | TrieNode2Ex nd = allNode2.get(0).m_values.get(key); |
| | | first[(int) key] = nd.Index; |
| | | } |
| | | |
| | | _first = first; |
| | | _min = new int[min.size()]; |
| | | _max = new int[min.size()]; |
| | | for (int i = 0; i < min.size(); i++) { |
| | | _min[i] = (int) (min.get(i)); |
| | | _max[i] = (int) (max.get(i)); |
| | | } |
| | | _nextIndex = new IntDictionary[nextIndexs.size()]; |
| | | for (int i = 0; i < nextIndexs.size(); i++) { |
| | | IntDictionary dictionary = new IntDictionary(); |
| | | dictionary.SetDictionary(nextIndexs.get(i)); |
| | | _nextIndex[i] = dictionary; |
| | | } |
| | | _end = new int[end.size()]; |
| | | for (int i = 0; i < end.size(); i++) { |
| | | _end[i] = (int) (end.get(i)); |
| | | } |
| | | _resultIndex = new int[resultIndex.size()]; |
| | | for (int i = 0; i < resultIndex.size(); i++) { |
| | | _resultIndex[i] = (int) (resultIndex.get(i)); |
| | | } |
| | | allNode2.clear(); |
| | | allNode2 = null; |
| | | } |
| | | |
| | | private int CreateDict(String keywords) { |
| | | Map<Character, Integer> dictionary = new Hashtable<Character, Integer>(); |
| | | for (int i = 0; i < keywords.length(); i++) { |
| | | Character item = keywords.charAt(i); |
| | | if (dictionary.containsKey(item)) { |
| | | dictionary.put(item, dictionary.get(item) + 1); |
| | | } else { |
| | | dictionary.put(item, 1); |
| | | } |
| | | } |
| | | Map<Character, Integer> dictionary2 = dictionary.entrySet().stream() |
| | | .sorted(Collections.reverseOrder(Map.Entry.comparingByValue())) |
| | | .collect(toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e2, LinkedHashMap::new)); |
| | | |
| | | List<Character> list2 = new ArrayList<Character>(); |
| | | for (Character item : dictionary2.keySet()) { |
| | | list2.add(item); |
| | | } |
| | | |
| | | _dict = new int[Character.MAX_VALUE + 1]; |
| | | for (int i = 0; i < list2.size(); i++) { |
| | | _dict[list2.get(i)] = i + 1; |
| | | } |
| | | return dictionary.size(); |
| | | } |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words.internals; |
| | | |
| | | import org.springblade.modules.words.NumHelper; |
| | | |
| | | import java.io.*; |
| | | import java.util.*; |
| | | |
| | | import static java.util.stream.Collectors.toMap; |
| | | |
| | | public abstract class BaseSearchEx2 { |
| | | protected String[] _keywords; |
| | | protected int[][] _guides; |
| | | protected int[] _key; |
| | | protected int[] _next; |
| | | protected int[] _check; |
| | | protected int[] _dict; |
| | | |
| | | /** |
| | | * 保存, 修改于2020-08-06,使用utf-8保存与以前保存的数据不同 |
| | | * |
| | | * @param filePath 文件地址 |
| | | * @throws IOException |
| | | */ |
| | | public void Save(String filePath) throws IOException { |
| | | File fi = new File(filePath); |
| | | FileOutputStream fs = new FileOutputStream(fi); |
| | | Save(fs); |
| | | fs.close(); |
| | | } |
| | | |
| | | protected void Save(FileOutputStream bw) throws IOException { |
| | | bw.write(NumHelper.serialize(_keywords.length)); |
| | | for (String item : _keywords) { |
| | | byte[] bytes = item.getBytes("utf-8"); |
| | | bw.write(NumHelper.serialize(bytes.length)); |
| | | bw.write(bytes); |
| | | } |
| | | |
| | | bw.write(NumHelper.serialize(_guides.length)); |
| | | for (int[] guide : _guides) { |
| | | bw.write(NumHelper.serialize(guide.length)); |
| | | for (int item : guide) { |
| | | bw.write(NumHelper.serialize(item)); |
| | | } |
| | | } |
| | | |
| | | bw.write(NumHelper.serialize(_key.length)); |
| | | for (int item : _key) { |
| | | bw.write(NumHelper.serialize(item)); |
| | | } |
| | | |
| | | bw.write(NumHelper.serialize(_next.length)); |
| | | for (int item : _next) { |
| | | bw.write(NumHelper.serialize(item)); |
| | | } |
| | | |
| | | bw.write(NumHelper.serialize(_check.length)); |
| | | for (int item : _check) { |
| | | bw.write(NumHelper.serialize(item)); |
| | | } |
| | | |
| | | bw.write(NumHelper.serialize(_dict.length)); |
| | | for (int item : _dict) { |
| | | bw.write(NumHelper.serialize(item)); |
| | | } |
| | | } |
| | | |
| | | /** |
| | | * 加载, 修改于2020-08-06,使用utf-8加载,加载以前数据可能会出错 |
| | | * |
| | | * @param filePath |
| | | * @throws FileNotFoundException |
| | | * @throws IOException |
| | | */ |
| | | public void Load(String filePath) throws FileNotFoundException, IOException { |
| | | File fi = new File(filePath); |
| | | InputStream in = new BufferedInputStream(new FileInputStream(fi)); |
| | | Load(in); |
| | | in.close(); |
| | | } |
| | | |
| | | public void Load(InputStream br) throws IOException { |
| | | int length = NumHelper.read(br); |
| | | _keywords = new String[length]; |
| | | for (int i = 0; i < length; i++) { |
| | | int l = NumHelper.read(br); |
| | | byte[] bytes = new byte[l]; |
| | | br.read(bytes, 0, l); |
| | | _keywords[i] = new String(bytes,"utf-8"); |
| | | } |
| | | length = NumHelper.read(br); |
| | | _guides = new int[length][]; |
| | | for (int i = 0; i < length; i++) { |
| | | int length2 = NumHelper.read(br); |
| | | _guides[i] = new int[length2]; |
| | | for (int j = 0; j < length2; j++) { |
| | | _guides[i][j] = NumHelper.read(br); |
| | | } |
| | | } |
| | | |
| | | length = NumHelper.read(br); |
| | | _key = new int[length]; |
| | | for (int i = 0; i < length; i++) { |
| | | _key[i] = NumHelper.read(br); |
| | | } |
| | | |
| | | length = NumHelper.read(br); |
| | | _next = new int[length]; |
| | | for (int i = 0; i < length; i++) { |
| | | _next[i] = NumHelper.read(br); |
| | | } |
| | | |
| | | length = NumHelper.read(br); |
| | | _check = new int[length]; |
| | | for (int i = 0; i < length; i++) { |
| | | _check[i] = NumHelper.read(br); |
| | | } |
| | | |
| | | length = NumHelper.read(br); |
| | | _dict = new int[length]; |
| | | for (int i = 0; i < length; i++) { |
| | | _dict[i] = NumHelper.read(br); |
| | | } |
| | | } |
| | | |
| | | /** |
| | | * 设置关键字 |
| | | * |
| | | * @param keywords |
| | | */ |
| | | public void SetKeywords(List<String> keywords) { |
| | | _keywords = keywords.toArray(new String[0]); |
| | | |
| | | SetKeywords(); |
| | | } |
| | | |
| | | private void SetKeywords() { |
| | | TrieNode root = new TrieNode(); |
| | | Map<Integer,List<TrieNode>> allNodeLayers=new TreeMap<Integer,List<TrieNode>>(); |
| | | for (int i = 0; i < _keywords.length; i++) { |
| | | String p = _keywords[i]; |
| | | TrieNode nd = root; |
| | | for (int j = 0; j < p.length(); j++) { |
| | | nd = nd.Add(p.charAt(j)); |
| | | if (nd.Layer == 0) { |
| | | nd.Layer = j + 1; |
| | | if(allNodeLayers.containsKey(nd.Layer)==false){ |
| | | List<TrieNode> nodes=new ArrayList<TrieNode>(); |
| | | nodes.add(nd); |
| | | allNodeLayers.put(nd.Layer, nodes); |
| | | }else { |
| | | allNodeLayers.get(nd.Layer).add(nd); |
| | | } } |
| | | } |
| | | nd.SetResults(i); |
| | | } |
| | | |
| | | List<TrieNode> allNode = new ArrayList<TrieNode>(); |
| | | allNode.add(root); |
| | | for (int i = 0; i < allNodeLayers.size(); i++) { //注意 这里不能用 keySet() |
| | | List<TrieNode> nodes = allNodeLayers.get(i+1); |
| | | for (int j = 0; j < nodes.size(); j++) { |
| | | allNode.add(nodes.get(j)); |
| | | } |
| | | } |
| | | allNodeLayers.clear(); |
| | | allNodeLayers=null; |
| | | |
| | | |
| | | for (int i = 1; i < allNode.size(); i++) { |
| | | TrieNode nd = allNode.get(i); |
| | | nd.Index = i; |
| | | TrieNode r = nd.Parent.Failure; |
| | | Character c = nd.Char; |
| | | while (r != null && !r.m_values.containsKey(c)) r = r.Failure; |
| | | if (r == null) |
| | | nd.Failure = root; |
| | | else { |
| | | nd.Failure = r.m_values.get(c); |
| | | for (Integer result : nd.Failure.Results) { |
| | | nd.SetResults(result); |
| | | } |
| | | } |
| | | } |
| | | root.Failure = root; |
| | | |
| | | |
| | | StringBuilder stringBuilder = new StringBuilder(); |
| | | for (int i = 1; i < allNode.size(); i++) { |
| | | stringBuilder.append(allNode.get(i).Char); |
| | | } |
| | | Integer length = CreateDict(stringBuilder.toString()); |
| | | stringBuilder = null; |
| | | |
| | | List<TrieNodeEx> allNode2 = new ArrayList<TrieNodeEx>(); |
| | | for (int i = 0; i < allNode.size(); i++) { |
| | | TrieNodeEx nd = new TrieNodeEx(); |
| | | nd.Index = i; |
| | | allNode2.add(nd); |
| | | } |
| | | for (int i = 0; i < allNode2.size(); i++) { |
| | | TrieNode oldNode = allNode.get(i); |
| | | TrieNodeEx newNode = allNode2.get(i); |
| | | newNode.Char = _dict[oldNode.Char]; |
| | | |
| | | for (Character key : oldNode.m_values.keySet()) { |
| | | TrieNode nd = oldNode.m_values.get(key); |
| | | newNode.Add(_dict[key], allNode2.get(nd.Index)); |
| | | } |
| | | oldNode.Results.forEach(item -> { |
| | | newNode.SetResults(item); |
| | | }); |
| | | oldNode = oldNode.Failure; |
| | | while (oldNode != root) { |
| | | for (Character key : oldNode.m_values.keySet()) { |
| | | if (newNode.HasKey(_dict[key]) == false) { |
| | | TrieNode nd = oldNode.m_values.get(key); |
| | | newNode.Add(_dict[key], allNode2.get(nd.Index)); |
| | | } |
| | | } |
| | | oldNode.Results.forEach(item -> { |
| | | newNode.SetResults(item); |
| | | }); |
| | | oldNode = oldNode.Failure; |
| | | } |
| | | } |
| | | allNode.clear(); |
| | | allNode = null; |
| | | root = null; |
| | | |
| | | build(allNode2, length); |
| | | } |
| | | |
| | | private void build(List<TrieNodeEx> nodes, int length) { |
| | | Integer[] has = new Integer[0x00FFFFFF]; |
| | | boolean[] seats = new boolean[0x00FFFFFF]; |
| | | boolean[] seats2 = new boolean[0x00FFFFFF]; |
| | | Integer start = 1; |
| | | Integer oneStart = 1; |
| | | for (int i = 0; i < nodes.size(); i++) { |
| | | TrieNodeEx node = nodes.get(i); |
| | | node.Rank(oneStart, start, seats, seats2, has); |
| | | } |
| | | Integer maxCount = has.length - 1; |
| | | while (has[maxCount] == null) { |
| | | maxCount--; |
| | | } |
| | | length = maxCount + length + 1; |
| | | |
| | | // length = root.Rank(has) + length + 1; |
| | | _key = new int[length]; |
| | | _next = new int[length]; |
| | | _check = new int[length]; |
| | | List<Integer[]> guides = new ArrayList<Integer[]>(); |
| | | guides.add(new Integer[] { 0 }); |
| | | for (int i = 0; i < length; i++) { |
| | | if (has[i] == null) |
| | | continue; |
| | | TrieNodeEx item = nodes.get(has[i]); |
| | | _key[i] = item.Char; |
| | | _next[i] = item.Next; |
| | | if (item.End) { |
| | | _check[i] = guides.size(); |
| | | Integer[] result = item.Results.toArray(new Integer[0]); |
| | | guides.add(result); |
| | | } |
| | | } |
| | | _guides = new int[guides.size()][]; |
| | | for (int i = 0; i < guides.size(); i++) { |
| | | Integer[] array = guides.get(i); |
| | | _guides[i] = new int[array.length]; |
| | | for (int j = 0; j < array.length; j++) { |
| | | _guides[i][j] = array[j]; |
| | | } |
| | | } |
| | | |
| | | } |
| | | |
| | | private int CreateDict(String keywords) { |
| | | Map<Character, Integer> dictionary = new Hashtable<Character, Integer>(); |
| | | for (int i = 0; i < keywords.length(); i++) { |
| | | Character item = keywords.charAt(i); |
| | | if (dictionary.containsKey(item)) { |
| | | dictionary.put(item, dictionary.get(item) + 1); |
| | | } else { |
| | | dictionary.put(item, 1); |
| | | } |
| | | } |
| | | Map<Character, Integer> dictionary2 = dictionary.entrySet().stream() |
| | | .sorted(Collections.reverseOrder(Map.Entry.comparingByValue())) |
| | | .collect(toMap(Map.Entry::getKey, Map.Entry::getValue, (e1, e2) -> e2, LinkedHashMap::new)); |
| | | |
| | | List<Character> list2 = new ArrayList<Character>(); |
| | | for (Character item : dictionary2.keySet()) { |
| | | list2.add(item); |
| | | } |
| | | |
| | | _dict = new int[Character.MAX_VALUE + 1]; |
| | | for (int i = 0; i < list2.size(); i++) { |
| | | _dict[list2.get(i)] = i + 1; |
| | | } |
| | | return dictionary.size(); |
| | | } |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words.internals; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.List; |
| | | import java.util.Map; |
| | | |
| | | public class IntDictionary { |
| | | private int[] _keys; |
| | | private int[] _values; |
| | | private int last; |
| | | |
| | | public IntDictionary() { |
| | | last = -1; |
| | | } |
| | | |
| | | public int[] getKeys() { |
| | | return _keys; |
| | | } |
| | | |
| | | public int[] getValues() { |
| | | return _values; |
| | | } |
| | | |
| | | public void SetDictionary(Map<Integer, Integer> dict) { |
| | | |
| | | List<Integer> keys = new ArrayList<Integer>(); |
| | | dict.forEach((k, v) -> { |
| | | keys.add((int) k); |
| | | }); |
| | | |
| | | _keys = new int[dict.size()]; |
| | | _values = new int[dict.size()]; |
| | | for (int i = 0; i < keys.size(); i++) { |
| | | _keys[i] = keys.get(i); |
| | | _values[i] = dict.get(_keys[i]); |
| | | } |
| | | last = _keys.length - 1; |
| | | } |
| | | |
| | | public void SetDictionary(int[] keys, int[] values) { |
| | | _keys = keys; |
| | | _values = values; |
| | | last = _keys.length - 1; |
| | | } |
| | | |
| | | public int IndexOf(int key) { |
| | | if (last == -1) { |
| | | return -1; |
| | | } |
| | | if (_keys[0] == key) { |
| | | return 0; |
| | | } |
| | | if (_keys[last] == key) { |
| | | return last; |
| | | } |
| | | |
| | | int left = 0; |
| | | int right = last; |
| | | while (left + 1 < right) { |
| | | int mid = (left + right) >> 1; |
| | | int d = _keys[mid] - key; |
| | | |
| | | if (d == 0) { |
| | | return mid; |
| | | } else if (d > 0) { |
| | | right = mid; |
| | | } else { |
| | | left = mid; |
| | | } |
| | | } |
| | | return -1; |
| | | } |
| | | |
| | | public int GetValue(int index){ |
| | | return _values[index]; |
| | | } |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words.internals; |
| | | |
| | | import org.springblade.modules.words.WordsSearch; |
| | | import org.springblade.modules.words.WordsSearchResult; |
| | | |
| | | import java.io.BufferedReader; |
| | | import java.io.IOException; |
| | | import java.io.InputStream; |
| | | import java.io.InputStreamReader; |
| | | import java.util.ArrayList; |
| | | import java.util.HashMap; |
| | | import java.util.List; |
| | | import java.util.Map; |
| | | |
| | | public class PinyinDict { |
| | | private static Map<String, Integer[]> _pyName; |
| | | private static String[] _pyShow; |
| | | private static Integer[] _pyIndex; |
| | | private static Integer[] _pyData; |
| | | private static Integer[] _wordPyIndex; |
| | | private static Integer[] _wordPy; |
| | | private static WordsSearch _search; |
| | | |
| | | public static String[] getPyShow() throws NumberFormatException, IOException { |
| | | InitPyIndex(); |
| | | return _pyShow; |
| | | } |
| | | |
| | | public static String[] GetPinyinList(String text, int tone) throws NumberFormatException, IOException { |
| | | InitPyIndex(); |
| | | InitPyWords(); |
| | | |
| | | String[] list = new String[text.length()]; |
| | | List<WordsSearchResult> pos = _search.FindAll(text); |
| | | Integer pindex = -1; |
| | | |
| | | for (WordsSearchResult p : pos) { |
| | | if (p.Start > pindex) { |
| | | for (int i = 0; i < p.Keyword.length(); i++) { |
| | | list[i + p.Start] = _pyShow[_wordPy[i + _wordPyIndex[p.Index]] + tone]; |
| | | } |
| | | pindex = p.End; |
| | | } |
| | | } |
| | | |
| | | for (int i = 0; i < text.length(); i++) { |
| | | if (list[i] != null) |
| | | continue; |
| | | Character c = text.charAt(i); |
| | | if (c >= 0x3400 && c <= 0x9fd5) { |
| | | int index = c - 0x3400; |
| | | int start = _pyIndex[index]; |
| | | int end = _pyIndex[index + 1]; |
| | | if (end > start) { |
| | | list[i] = _pyShow[_pyData[start] + tone]; |
| | | } |
| | | } |
| | | } |
| | | return list; |
| | | } |
| | | |
| | | public static String GetPinyin(String text, int tone) throws NumberFormatException, IOException { |
| | | InitPyIndex(); |
| | | |
| | | String[] list = GetPinyinList(text, tone); |
| | | StringBuilder sb = new StringBuilder(); |
| | | for (int i = 0; i < list.length; i++) { |
| | | String s = list[i]; |
| | | if (s != null) { |
| | | sb.append(list[i]); |
| | | } else { |
| | | sb.append(text.charAt(i)); |
| | | } |
| | | } |
| | | return sb.toString(); |
| | | } |
| | | |
| | | public static String GetFirstPinyin(String text, int tone) throws NumberFormatException, IOException { |
| | | InitPyIndex(); |
| | | |
| | | String[] list = GetPinyinList(text, tone); |
| | | StringBuilder sb = new StringBuilder(text); |
| | | for (int i = 0; i < list.length; i++) { |
| | | String c = list[i]; |
| | | if (c != null) { |
| | | sb.setCharAt(i, c.charAt(0)); |
| | | } |
| | | } |
| | | return sb.toString(); |
| | | } |
| | | |
| | | public static List<String> GetAllPinyin(Character c, int tone) throws NumberFormatException, IOException { |
| | | InitPyIndex(); |
| | | if (c >= 0x3400 && c <= 0x9fd5) { |
| | | int index = c - 0x3400; |
| | | List<String> list = new ArrayList<String>(); |
| | | int start = _pyIndex[index]; |
| | | int end = _pyIndex[index + 1]; |
| | | if (end > start) { |
| | | for (int i = start; i < end; i++) { |
| | | String py = _pyShow[_pyData[i] + tone]; |
| | | if (list.contains(py) == false) { |
| | | list.add(py); |
| | | } |
| | | } |
| | | } |
| | | return list; |
| | | } |
| | | return new ArrayList<String>(); |
| | | } |
| | | |
| | | public static String GetPinyinFast(Character c, int tone) throws NumberFormatException, IOException { |
| | | InitPyIndex(); |
| | | |
| | | if (c >= 0x3400 && c <= 0x9fd5) { |
| | | int index = c - 0x3400; |
| | | int start = _pyIndex[index]; |
| | | int end = _pyIndex[index + 1]; |
| | | if (end > start) { |
| | | return _pyShow[_pyData[start] + tone]; |
| | | } |
| | | } |
| | | return c.toString(); |
| | | } |
| | | |
| | | public static List<String> GetPinyinForName(String name, int tone) throws NumberFormatException, IOException { |
| | | InitPyName(); |
| | | InitPyIndex(); |
| | | |
| | | List<String> list = new ArrayList<String>(); |
| | | String xing; |
| | | String ming; |
| | | Integer[] indexs; |
| | | if (name.length() > 1) { // 检查复姓 |
| | | xing = name.substring(0, 2); |
| | | if (_pyName.containsKey(xing)) { |
| | | indexs = _pyName.get(xing); |
| | | for (Integer index : indexs) { |
| | | list.add(_pyShow[index + tone]); |
| | | } |
| | | if (name.length() > 2) { |
| | | ming = name.substring(2); |
| | | String[] pys = GetPinyinList(ming, tone); |
| | | for (String py : pys) { |
| | | list.add(py); |
| | | } |
| | | } |
| | | return list; |
| | | } |
| | | } |
| | | xing = name.substring(0, 1); |
| | | if (_pyName.containsKey(xing)) { |
| | | indexs = _pyName.get(xing); |
| | | for (Integer index : indexs) { |
| | | list.add(_pyShow[index + tone]); |
| | | } |
| | | if (name.length() > 1) { |
| | | ming = name.substring(1); |
| | | String[] pys = GetPinyinList(ming, tone); |
| | | for (String py : pys) { |
| | | list.add(py); |
| | | } |
| | | } |
| | | return list; |
| | | } |
| | | String[] pys = GetPinyinList(name, tone); |
| | | for (String py : pys) { |
| | | list.add(py); |
| | | } |
| | | return list; |
| | | } |
| | | private final static Object lockObj = new Object(); |
| | | private static void InitPyIndex() throws NumberFormatException, IOException { |
| | | if (_pyIndex == null) { |
| | | synchronized(lockObj){ |
| | | if (_pyIndex == null) { |
| | | String resourceName = "pyIndex.txt"; |
| | | InputStream u1 = WordsSearch.class.getClassLoader().getResourceAsStream(resourceName); |
| | | BufferedReader br = new BufferedReader(new InputStreamReader(u1)); |
| | | |
| | | String tStr = ""; |
| | | List<Integer> pyIndex = new ArrayList<Integer>(); |
| | | pyIndex.add(0); |
| | | List<Integer> pyData = new ArrayList<Integer>(); |
| | | |
| | | while ((tStr = br.readLine()) != null) { |
| | | if (_pyShow == null) { |
| | | String[] ss = tStr.split(","); |
| | | _pyShow = ss; |
| | | } else { |
| | | if (tStr != "0") { |
| | | for (String idx : tStr.split(",")) { |
| | | int in = Integer.valueOf(idx, 16); |
| | | pyData.add(in); |
| | | } |
| | | } |
| | | pyIndex.add((int) pyData.size()); |
| | | } |
| | | } |
| | | br.close(); |
| | | |
| | | Integer[] pd = new Integer[pyData.size()]; |
| | | _pyData = pyData.toArray(pd); |
| | | Integer[] pi = new Integer[pyIndex.size()]; |
| | | _pyIndex = pyIndex.toArray(pi); |
| | | } |
| | | } |
| | | } |
| | | } |
| | | |
| | | private static void InitPyName() throws NumberFormatException, IOException { |
| | | if (_pyName == null) { |
| | | synchronized(lockObj){ |
| | | if (_pyName == null) { |
| | | String resourceName = "pyName.txt"; |
| | | InputStream u1 = WordsSearch.class.getClassLoader().getResourceAsStream(resourceName); |
| | | BufferedReader br = new BufferedReader(new InputStreamReader(u1)); |
| | | |
| | | Map<String, Integer[]> pyName = new HashMap<String, Integer[]>(); |
| | | String tStr = ""; |
| | | while ((tStr = br.readLine()) != null) { |
| | | String[] sp = tStr.split(","); |
| | | List<Integer> index = new ArrayList<Integer>(); |
| | | for (int i = 1; i < sp.length; i++) { |
| | | String idx = sp[i]; |
| | | int in = Integer.valueOf(idx, 16); |
| | | index.add(in); |
| | | } |
| | | Integer[] temp = new Integer[index.size()]; |
| | | pyName.put(sp[0], index.toArray(temp)); |
| | | } |
| | | br.close(); |
| | | |
| | | _pyName = pyName; |
| | | |
| | | } |
| | | } |
| | | } |
| | | } |
| | | |
| | | private static void InitPyWords() throws NumberFormatException, IOException { |
| | | if (_search == null) { |
| | | synchronized(lockObj){ |
| | | if (_search == null) { |
| | | String resourceName = "pyWords.txt"; |
| | | InputStream u1 = WordsSearch.class.getClassLoader().getResourceAsStream(resourceName); |
| | | BufferedReader br = new BufferedReader(new InputStreamReader(u1)); |
| | | |
| | | List<String> keywords = new ArrayList<String>(); |
| | | List<Integer> wordPyIndex = new ArrayList<Integer>(); |
| | | List<Integer> wordPy = new ArrayList<Integer>(); |
| | | |
| | | String tStr = ""; |
| | | while ((tStr = br.readLine()) != null) { |
| | | String[] sp = tStr.split(","); |
| | | keywords.add(sp[0]); |
| | | wordPyIndex.add(wordPy.size()); |
| | | for (int i = 1; i < sp.length; i++) { |
| | | String idx = sp[i]; |
| | | int in = Integer.valueOf(idx, 16); |
| | | wordPy.add(in); |
| | | } |
| | | } |
| | | br.close(); |
| | | WordsSearch search = new WordsSearch(); |
| | | search.SetKeywords(keywords); |
| | | Integer[] wp = new Integer[wordPy.size()]; |
| | | _wordPy = wordPy.toArray(wp); |
| | | Integer[] wpi = new Integer[wordPyIndex.size()]; |
| | | _wordPyIndex = wordPyIndex.toArray(wpi); |
| | | _search = search; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words.internals; |
| | | |
| | | import org.springblade.modules.words.WordsSearch; |
| | | import org.springblade.modules.words.WordsSearchResult; |
| | | |
| | | import java.io.BufferedReader; |
| | | import java.io.IOException; |
| | | import java.io.InputStream; |
| | | import java.io.InputStreamReader; |
| | | import java.util.ArrayList; |
| | | import java.util.HashMap; |
| | | import java.util.List; |
| | | import java.util.Map; |
| | | |
| | | public class Translate { |
| | | private static WordsSearch s2tSearch; |
| | | private static WordsSearch t2sSearch; |
| | | private static WordsSearch t2twSearch; |
| | | private static WordsSearch tw2tSearch; |
| | | private static WordsSearch t2hkSearch; |
| | | private static WordsSearch hk2tSearch; |
| | | |
| | | /** |
| | | * 转繁体中文 |
| | | * |
| | | * @param text |
| | | * @param type 0、繁体中文,1、港澳繁体,2、台湾正体 |
| | | * @return |
| | | * @throws Exception |
| | | */ |
| | | public static String ToTraditionalChinese(String text, final int type) throws Exception { |
| | | if (type > 2 || type < 0) { |
| | | throw new Exception("type 不支持该类型"); |
| | | } |
| | | |
| | | final WordsSearch s2t = GetWordsSearch(true, 0); |
| | | text = TransformationReplace(text, s2t); |
| | | if (type > 0) { |
| | | final WordsSearch t2 = GetWordsSearch(true, type); |
| | | text = TransformationReplace(text, t2); |
| | | } |
| | | return text; |
| | | } |
| | | |
| | | /** |
| | | * 转简体中文 |
| | | * |
| | | * @param text |
| | | * @param srcType 0、繁体中文,1、港澳繁体,2、台湾正体 |
| | | * @return |
| | | * @throws Exception |
| | | */ |
| | | public static String ToSimplifiedChinese(String text, final int srcType) throws Exception { |
| | | if (srcType > 2 || srcType < 0) { |
| | | throw new Exception("srcType 不支持该类型"); |
| | | } |
| | | if (srcType > 0) { |
| | | final WordsSearch t2 = GetWordsSearch(false, srcType); |
| | | text = TransformationReplace(text, t2); |
| | | } |
| | | final WordsSearch s2t = GetWordsSearch(false, 0); |
| | | text = TransformationReplace(text, s2t); |
| | | return text; |
| | | } |
| | | |
| | | /** |
| | | * 清理 简繁转换 缓存 |
| | | */ |
| | | public static void ClearTranslate() { |
| | | s2tSearch = null; |
| | | t2sSearch = null; |
| | | t2twSearch = null; |
| | | tw2tSearch = null; |
| | | t2hkSearch = null; |
| | | hk2tSearch = null; |
| | | } |
| | | |
| | | /** |
| | | * |
| | | * @param text |
| | | * @param wordsSearch |
| | | * @return |
| | | */ |
| | | private static String TransformationReplace(String text, WordsSearch wordsSearch) { |
| | | List<WordsSearchResult> ts = wordsSearch.FindAll(text); |
| | | StringBuilder sb = new StringBuilder(); |
| | | int index = 0; |
| | | while (index < text.length()) { |
| | | WordsSearchResult t = null; |
| | | int end = -1; |
| | | for (WordsSearchResult wordsSearchResult : ts) { |
| | | if (wordsSearchResult.Start == index) { |
| | | if (end < wordsSearchResult.End) { |
| | | end = wordsSearchResult.End; |
| | | t = wordsSearchResult; |
| | | } |
| | | } |
| | | } |
| | | if (t == null) { |
| | | sb.append(text.charAt(index)); |
| | | index++; |
| | | } else { |
| | | sb.append(wordsSearch._others[t.Index]); |
| | | index = t.End + 1; |
| | | } |
| | | } |
| | | return sb.toString(); |
| | | } |
| | | |
| | | private final static Object lockObj = new Object(); |
| | | private static WordsSearch GetWordsSearch(Boolean s2t, int srcType) throws IOException { |
| | | if (s2t) { |
| | | if (srcType == 0) { |
| | | if (s2tSearch == null) { |
| | | synchronized(lockObj){ |
| | | if (s2tSearch == null) { |
| | | s2tSearch = BuildWordsSearch("s2t.dat", false); |
| | | } |
| | | } |
| | | } |
| | | return s2tSearch; |
| | | } else if (srcType == 1) { |
| | | if (t2hkSearch == null) { |
| | | synchronized(lockObj){ |
| | | if (t2hkSearch == null) { |
| | | t2hkSearch = BuildWordsSearch("t2hk.dat", false); |
| | | } |
| | | } |
| | | } |
| | | return t2hkSearch; |
| | | } else if (srcType == 2) { |
| | | if (t2twSearch == null) { |
| | | synchronized(lockObj){ |
| | | if (t2twSearch == null) { |
| | | t2twSearch = BuildWordsSearch("t2tw.dat", false); |
| | | } |
| | | } |
| | | } |
| | | return t2twSearch; |
| | | } |
| | | } else { |
| | | if (srcType == 0) { |
| | | if (t2sSearch == null) { |
| | | synchronized(lockObj){ |
| | | if (t2sSearch == null) { |
| | | t2sSearch = BuildWordsSearch("t2s.dat", false); |
| | | } |
| | | } |
| | | } |
| | | return t2sSearch; |
| | | } else if (srcType == 1) { |
| | | if (hk2tSearch == null) { |
| | | synchronized(lockObj){ |
| | | if (hk2tSearch == null) { |
| | | hk2tSearch = BuildWordsSearch("t2hk.dat", true); |
| | | } |
| | | } |
| | | } |
| | | return hk2tSearch; |
| | | } else if (srcType == 2) { |
| | | if (tw2tSearch == null) { |
| | | synchronized(lockObj){ |
| | | if (tw2tSearch == null) { |
| | | tw2tSearch = BuildWordsSearch("t2tw.dat", true); |
| | | } |
| | | } |
| | | } |
| | | return tw2tSearch; |
| | | } |
| | | } |
| | | return null; |
| | | } |
| | | |
| | | private static WordsSearch BuildWordsSearch(String fileName, Boolean reverse) throws IOException { |
| | | Map<String, String> dict = GetTransformationDict(fileName); |
| | | List<String> Keys = new ArrayList<String>(); |
| | | List<String> Values = new ArrayList<String>(); |
| | | dict.forEach((k, v) -> { |
| | | Keys.add(k); |
| | | Values.add(v); |
| | | }); |
| | | WordsSearch wordsSearch = new WordsSearch(); |
| | | if (reverse) { |
| | | wordsSearch.SetKeywords(Values); |
| | | String[] temp = new String[Keys.size()]; |
| | | wordsSearch._others = Keys.toArray(temp); |
| | | } else { |
| | | wordsSearch.SetKeywords(Keys); |
| | | String[] temp = new String[Keys.size()]; |
| | | wordsSearch._others = Values.toArray(temp); |
| | | } |
| | | return wordsSearch; |
| | | } |
| | | |
| | | static Map<String, String> GetTransformationDict(String fileName) throws IOException { |
| | | String resourceName = fileName; |
| | | InputStream u1 = WordsSearch.class.getClassLoader().getResourceAsStream(resourceName); |
| | | BufferedReader br = new BufferedReader(new InputStreamReader(u1)); |
| | | |
| | | String tStr = ""; |
| | | Map<String, String> dict = new HashMap<String, String>(); |
| | | while ((tStr = br.readLine()) != null) { |
| | | String[] ss = tStr.split("\t"); |
| | | if (ss.length < 2) { |
| | | continue; |
| | | } |
| | | dict.put(ss[0], ss[1]); |
| | | } |
| | | br.close(); |
| | | return dict; |
| | | } |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words.internals; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.HashMap; |
| | | import java.util.List; |
| | | |
| | | public class TrieNode implements Comparable<TrieNode> { |
| | | |
| | | public int Index; |
| | | public int Layer; |
| | | public boolean End; |
| | | public char Char; |
| | | public List<Integer> Results; |
| | | public HashMap<Character, TrieNode> m_values; |
| | | public TrieNode Failure; |
| | | public TrieNode Parent; |
| | | public boolean IsWildcard; |
| | | public int WildcardLayer; |
| | | public boolean HasWildcard; |
| | | |
| | | |
| | | public TrieNode() { |
| | | m_values = new HashMap<Character, TrieNode>(); |
| | | Results = new ArrayList<Integer>(); |
| | | } |
| | | |
| | | public TrieNode Add(final Character c) { |
| | | if (m_values.containsKey(c)) { |
| | | return m_values.get(c); |
| | | } |
| | | final TrieNode node = new TrieNode(); |
| | | node.Parent = this; |
| | | node.Char = c; |
| | | m_values.put(c, node); |
| | | return node; |
| | | } |
| | | |
| | | public void SetResults(final Integer index) { |
| | | if (End == false) { |
| | | End = true; |
| | | } |
| | | if (Results.contains(index) == false) { |
| | | Results.add(index); |
| | | } |
| | | } |
| | | |
| | | @Override |
| | | public int compareTo(final TrieNode o) { |
| | | return this.Layer - o.Layer ; |
| | | } |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words.internals; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.HashMap; |
| | | import java.util.List; |
| | | |
| | | |
| | | public class TrieNode2{ |
| | | public boolean End; |
| | | public List<Integer> Results; |
| | | public HashMap<Character, TrieNode2> m_values; |
| | | private int minflag = Integer.MAX_VALUE; |
| | | private int maxflag = 0; |
| | | |
| | | public TrieNode2() |
| | | { |
| | | Results = new ArrayList<Integer>(); |
| | | m_values = new HashMap<Character, TrieNode2>(); |
| | | } |
| | | |
| | | public void Add(final char c, final TrieNode2 node3) { |
| | | if (minflag > c) { |
| | | minflag = c; |
| | | } |
| | | if (maxflag < c) { |
| | | maxflag = c; |
| | | } |
| | | m_values.put(c, node3); |
| | | } |
| | | |
| | | public void SetResults(final Integer index) { |
| | | if (End == false) { |
| | | End = true; |
| | | } |
| | | if (Results.contains(index) == false) { |
| | | Results.add(index); |
| | | } |
| | | } |
| | | |
| | | public boolean HasKey(final char c) { |
| | | if (minflag <= c && maxflag >= c) { |
| | | return m_values.containsKey(c); |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | public TrieNode2 GetValue(final char c) { |
| | | return m_values.get(c); |
| | | } |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words.internals; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.HashMap; |
| | | import java.util.List; |
| | | |
| | | public class TrieNode2Ex { |
| | | public int Index; |
| | | public boolean End; |
| | | public List<Integer> Results; |
| | | public HashMap<Integer, TrieNode2Ex> m_values; |
| | | public int minflag = Integer.MAX_VALUE; |
| | | public int maxflag = 0; |
| | | |
| | | public TrieNode2Ex() |
| | | { |
| | | Results = new ArrayList<Integer>(); |
| | | m_values = new HashMap<Integer, TrieNode2Ex>(); |
| | | } |
| | | |
| | | public void Add(final int c, final TrieNode2Ex node3) { |
| | | if (minflag > c) { |
| | | minflag = c; |
| | | } |
| | | if (maxflag < c) { |
| | | maxflag = c; |
| | | } |
| | | m_values.put(c, node3); |
| | | } |
| | | |
| | | public void SetResults(final int index) { |
| | | if (End == false) { |
| | | End = true; |
| | | } |
| | | if (Results.contains(index) == false) { |
| | | Results.add(index); |
| | | } |
| | | } |
| | | |
| | | public boolean HasKey(final int c) { |
| | | if (minflag <= c && maxflag >= c) { |
| | | return m_values.containsKey(c); |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | public TrieNode2Ex GetValue(final int c) { |
| | | return m_values.get(c); |
| | | } |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words.internals; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.HashMap; |
| | | import java.util.List; |
| | | |
| | | public class TrieNode3 { |
| | | public boolean End; |
| | | public boolean HasWildcard; |
| | | public List<Integer> Results; |
| | | public HashMap<Character, TrieNode3> m_values; |
| | | private int minflag = Integer.MAX_VALUE; |
| | | private int maxflag = 0; |
| | | public TrieNode3 WildcardNode; |
| | | |
| | | |
| | | public TrieNode3() |
| | | { |
| | | Results = new ArrayList<Integer>(); |
| | | m_values = new HashMap<Character, TrieNode3>(); |
| | | } |
| | | |
| | | public void Add(final char c, final TrieNode3 node3) { |
| | | if (minflag > c) { |
| | | minflag = c; |
| | | } |
| | | if (maxflag < c) { |
| | | maxflag = c; |
| | | } |
| | | m_values.put(c, node3); |
| | | } |
| | | |
| | | public void SetResults(final int index) { |
| | | if (End == false) { |
| | | End = true; |
| | | } |
| | | if (Results.contains(index) == false) { |
| | | Results.add(index); |
| | | } |
| | | } |
| | | |
| | | public boolean HasKey(final char c) { |
| | | if (minflag <= c && maxflag >= c) { |
| | | return m_values.containsKey(c); |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | public TrieNode3 GetValue(final char c) { |
| | | return m_values.get(c); |
| | | } |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words.internals; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.HashMap; |
| | | import java.util.List; |
| | | |
| | | public class TrieNode3Ex { |
| | | public int Index; |
| | | public boolean End; |
| | | public boolean HasWildcard; |
| | | public List<Integer> Results; |
| | | public HashMap<Character, TrieNode3Ex> m_values; |
| | | public int minflag = Integer.MAX_VALUE; |
| | | public int maxflag = 0; |
| | | public TrieNode3Ex WildcardNode; |
| | | |
| | | |
| | | public TrieNode3Ex() |
| | | { |
| | | Results = new ArrayList<Integer>(); |
| | | m_values = new HashMap<Character, TrieNode3Ex>(); |
| | | } |
| | | |
| | | public void Add(final char c, final TrieNode3Ex node3) { |
| | | if (minflag > c) { |
| | | minflag = c; |
| | | } |
| | | if (maxflag < c) { |
| | | maxflag = c; |
| | | } |
| | | m_values.put(c, node3); |
| | | } |
| | | |
| | | public void SetResults(final int index) { |
| | | if (End == false) { |
| | | End = true; |
| | | } |
| | | if (Results.contains(index) == false) { |
| | | Results.add(index); |
| | | } |
| | | } |
| | | |
| | | public boolean HasKey(final char c) { |
| | | if (minflag <= c && maxflag >= c) { |
| | | return m_values.containsKey(c); |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | public TrieNode3Ex GetValue(final char c) { |
| | | return m_values.get(c); |
| | | } |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words.internals; |
| | | |
| | | import java.util.ArrayList; |
| | | import java.util.Hashtable; |
| | | import java.util.List; |
| | | import java.util.Map; |
| | | |
| | | public class TrieNodeEx { |
| | | public Integer Char; |
| | | public boolean End; |
| | | public Integer Index; |
| | | public List<Integer> Results; |
| | | public Map<Integer, TrieNodeEx> m_values; |
| | | private Integer minflag = Integer.MAX_VALUE; |
| | | private Integer maxflag = 0; |
| | | public int Next; |
| | | |
| | | public TrieNodeEx() { |
| | | m_values = new Hashtable<Integer, TrieNodeEx>(); |
| | | Results = new ArrayList<Integer>(); |
| | | } |
| | | |
| | | public void Add(int c, TrieNodeEx node3) { |
| | | if (minflag > c) { |
| | | minflag = c; |
| | | } |
| | | if (maxflag < c) { |
| | | maxflag = c; |
| | | } |
| | | m_values.put(c, node3); |
| | | } |
| | | |
| | | public void SetResults(Integer text) { |
| | | if (End == false) { |
| | | End = true; |
| | | } |
| | | if (Results.contains(text) == false) { |
| | | Results.add(text); |
| | | } |
| | | } |
| | | |
| | | public boolean HasKey(Integer c) { |
| | | if (minflag <= c && maxflag >= c) { |
| | | return m_values.containsKey((int) c); |
| | | } |
| | | return false; |
| | | } |
| | | |
| | | public void Rank(Integer oneStart, Integer start, boolean[] seats, boolean[] seats2, Integer[] has) { |
| | | if (maxflag == 0) |
| | | return; |
| | | if (minflag == maxflag) { |
| | | RankOne(oneStart, seats, has); |
| | | return; |
| | | } |
| | | List<Integer> keys = new ArrayList<Integer>(); |
| | | m_values.forEach((k, v) -> { |
| | | keys.add((int) k); |
| | | }); |
| | | |
| | | Integer length = keys.size() - 1; |
| | | int[] moves = new int[keys.size() - 1]; |
| | | for (int i = 1; i < keys.size(); i++) { |
| | | moves[i - 1] = maxflag - keys.get(i); |
| | | } |
| | | |
| | | while (has[start] != null) { |
| | | start++; |
| | | } |
| | | Integer s = start < minflag ? minflag : start; |
| | | |
| | | for (int i = s; i < s + (maxflag - minflag); i++) { |
| | | if (has[i] != null) { |
| | | for (int j = 0; j < length; j++) { |
| | | Integer p = i + moves[j]; |
| | | if (seats2[p] == false) { |
| | | seats2[p] = true; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | Integer max = 0; |
| | | for (int i = s + (maxflag - minflag); i < has.length; i++) { |
| | | if (has[i] == null) { |
| | | if (seats2[i]) { |
| | | continue; |
| | | } |
| | | Integer next = i - (Integer) maxflag; |
| | | if (seats[next]) |
| | | continue; |
| | | SetSeats(next, seats, has); |
| | | max = i; |
| | | break; |
| | | } else { |
| | | for (int j = 0; j < length; j++) { |
| | | Integer p = i + moves[j]; |
| | | if (seats2[p] == false) { |
| | | seats2[p] = true; |
| | | } |
| | | } |
| | | } |
| | | } |
| | | start += keys.size() / 2; |
| | | for (int p = start; p < max + maxflag - start + 1; p++) { |
| | | if (seats2[p] == true) { |
| | | seats2[p] = false; |
| | | } |
| | | } |
| | | } |
| | | |
| | | private void RankOne(Integer start, boolean[] seats, Integer[] has) { |
| | | while (has[start] != null) { |
| | | start++; |
| | | } |
| | | Integer s = start < minflag ? minflag : start; |
| | | |
| | | for (Integer i = s; i < has.length; i++) { |
| | | if (has[i] == null) { |
| | | Integer next = i - (Integer) minflag; |
| | | if (seats[next]) |
| | | continue; |
| | | SetSeats(next, seats, has); |
| | | break; |
| | | } |
| | | } |
| | | start++; |
| | | } |
| | | |
| | | |
| | | private void SetSeats(Integer next, boolean[] seats, Integer[] has) { |
| | | Next = next; |
| | | seats[next] = true; |
| | | |
| | | m_values.forEach((key, value) -> { |
| | | int position = next + key; |
| | | has[position] = value.Index; |
| | | }); |
| | | } |
| | | |
| | | } |
| New file |
| | |
| | | package org.springblade.modules.words.internals; |
| | | |
| | | public class TwoTuple<A, B> { |
| | | public A Item1; |
| | | public B Item2; |
| | | |
| | | public TwoTuple(A a, B b) { |
| | | this.Item1 = a; |
| | | this.Item2 = b; |
| | | } |
| | | } |
| | |
| | | |
| | | #ftp 设置 |
| | | ftp: |
| | | sqlConnect: jdbc:mysql://localhost:2083/zhbaw?useSSL=false&useUnicode=true&characterEncoding=utf-8&zeroDateTimeBehavior=convertToNull&transformedBitIsBoolean=true&serverTimezone=GMT%2B8&nullCatalogMeansCurrent=true&allowPublicKeyRetrieval=true |
| | | sqlConnect: jdbc:mysql://localhost:2083/qfqkpublic?useSSL=false&useUnicode=true&characterEncoding=utf-8&zeroDateTimeBehavior=convertToNull&transformedBitIsBoolean=true&serverTimezone=GMT%2B8&nullCatalogMeansCurrent=true&allowPublicKeyRetrieval=true |
| | | ftpHost: 172.19.1.30 |
| | | ftpPort: 21 |
| | | ftpUserName: yly |