99public class TrieTree {
1010 private final TrieNode root ;
1111
12+ // 静态成员,预编译的正则表达式
13+ private static final Pattern INVALID_CHAR_PATTERN = Pattern .compile ("[^a-zA-Z0-9\u4E00 -\u9FA5 ]" );
14+
15+ // 创建一个新的Trie树,根节点为空
1216 public TrieTree () {
1317 root = new TrieNode ();
1418 }
1519
20+ // 插入一个新的关键词到Trie树中
1621 public void insert (String word ) {
1722 TrieNode node = root ;
1823 for (char c : word .toCharArray ()) {
19- node .children .putIfAbsent (c , new TrieNode ());
20- node = node .children .get (c );
24+ node .children .putIfAbsent (c , new TrieNode ()); // 如果该字符在当前节点的子节点中不存在,则创建一个新的子节点
25+ node = node .children .get (c ); // 移动到下一个子节点
2126 }
22- node .end = true ;
27+ node .end = true ; // 标记最后一个字符的节点为结束节点,表示一个完整的关键词
2328 }
2429
30+ // 构建AC自动机的失效链接
2531 public void buildFailureNode () {
2632 Queue <TrieNode > queue = new LinkedList <>();
2733 for (TrieNode child : root .children .values ()) {
28- child .fail = root ;
34+ child .fail = root ; // 根节点的子节点的失效链接都指向根节点
2935 queue .add (child );
3036 }
3137 while (!queue .isEmpty ()) {
@@ -35,58 +41,61 @@ public void buildFailureNode() {
3541 queue .add (child );
3642 TrieNode failNode = current .fail ;
3743 while (failNode != null && !failNode .children .containsKey (c ))
38- failNode = failNode .fail ;
39- child .fail = failNode != null ? failNode .children .get (c ) : root ;
44+ failNode = failNode .fail ; // 寻找失效链接的节点
45+ child .fail = failNode != null ? failNode .children .get (c ) : root ; // 如果找到了失效链接的节点,则指向该节点的对应子节点,否则指向根节点
4046 }
4147 }
4248 }
4349
50+ // 检查文本中是否存在关键词
4451 public boolean checkText (String text ) {
4552 TrieNode current = root ;
4653 for (char c : text .toCharArray ()) {
47- if (this . isInvalidChar (c )) continue ;
54+ if (isInvalidChar (c )) continue ; // 如果字符无效,则跳过
4855 while (current != null && !current .children .containsKey (c ))
49- current = current .fail ;
56+ current = current .fail ; // 如果当前节点的子节点中不存在该字符,则跟随失效链接向上查找
5057 if (current == null ) {
51- current = root ;
58+ current = root ; // 如果没有找到,则回到根节点并继续查找
5259 continue ;
5360 }
54- current = current .children .get (c );
55- if (current .end ) return true ;
61+ current = current .children .get (c ); // 如果找到了,则转到下一个子节点
62+ if (current .end ) return true ; // 如果找到了一个关键词的结束节点,则返回true
5663 }
57- return false ;
64+ return false ; // 如果没有找到任何关键词,则返回false
5865 }
5966
67+ // 检查文本中的关键词数量
6068 public int checkTextWithCount (String text ) {
6169 Set <TrieNode > nodes = new HashSet <>();
6270 int count = 0 ;
6371 TrieNode current = root ;
6472 for (char c : text .toCharArray ()) {
65- if (this . isInvalidChar (c )) continue ;
73+ if (isInvalidChar (c )) continue ; // 无效字符直接跳过
6674 while (current != null && !current .children .containsKey (c ))
67- current = current .fail ;
75+ current = current .fail ; // 如果当前节点的子节点中不存在该字符,则跟随失效链接向上查找
6876 if (current == null ) {
69- current = root ;
77+ current = root ; // 如果没有找到,则回到根节点并继续查找
7078 continue ;
7179 }
72- current = current .children .get (c );
80+ current = current .children .get (c ); // 如果找到了,则转到下一个子节点
7381 TrieNode tmp = current ;
7482 while (tmp != null ) {
7583 if (tmp .end && !nodes .contains (tmp )) {
76- nodes .add (tmp );
84+ nodes .add (tmp ); // 如果找到了一个关键词的结束节点,并且该节点还没有被计数过,则计数加1
7785 count ++;
7886 }
79- tmp = tmp .fail ;
87+ tmp = tmp .fail ; // 向上跟随失效链接查找其他可能的关键词
8088 }
8189 }
82- return count ;
90+ return count ; // 返回找到的关键词数量
8391 }
8492
93+ // 检查字符是否为无效字符,这里定义了无效字符为非英文、非数字和非中文的字符
8594 private boolean isInvalidChar (char c ) {
86- String regex = "[^a-zA-Z0-9一-龥]" ;
87- return Pattern .matches (regex , String .valueOf (c ));
95+ return INVALID_CHAR_PATTERN .matcher (String .valueOf (c )).matches ();
8896 }
8997
98+ // Trie树的节点类,包含子节点、失效链接和结束标记
9099 private static class TrieNode {
91100 HashMap <Character , TrieNode > children ;
92101 TrieNode fail ;
0 commit comments