Skip to content

Commit b5c0097

Browse files
authored
Merge pull request #2 from Doge2077/feature
在 TrieTree.java 文件中添加了部分注释,并预编译了正则表达式,且将还原了unicode转义序列适配不同编码格式
2 parents 9d2d391 + 69f7c22 commit b5c0097

File tree

1 file changed

+30
-21
lines changed
  • spring-boot-itbaima-robot/src/main/java/net/itbaima/robot/listener/util

1 file changed

+30
-21
lines changed

spring-boot-itbaima-robot/src/main/java/net/itbaima/robot/listener/util/TrieTree.java

Lines changed: 30 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -9,23 +9,29 @@
99
public class TrieTree {
1010
private final TrieNode root;
1111

12+
// 静态成员,预编译的正则表达式
13+
private static final Pattern INVALID_CHAR_PATTERN = Pattern.compile("[^a-zA-Z0-9\u4E00-\u9FA5]");
14+
15+
// 创建一个新的Trie树,根节点为空
1216
public TrieTree() {
1317
root = new TrieNode();
1418
}
1519

20+
// 插入一个新的关键词到Trie树中
1621
public void insert(String word) {
1722
TrieNode node = root;
1823
for (char c : word.toCharArray()) {
19-
node.children.putIfAbsent(c, new TrieNode());
20-
node = node.children.get(c);
24+
node.children.putIfAbsent(c, new TrieNode()); // 如果该字符在当前节点的子节点中不存在,则创建一个新的子节点
25+
node = node.children.get(c); // 移动到下一个子节点
2126
}
22-
node.end = true;
27+
node.end = true; // 标记最后一个字符的节点为结束节点,表示一个完整的关键词
2328
}
2429

30+
// 构建AC自动机的失效链接
2531
public void buildFailureNode() {
2632
Queue<TrieNode> queue = new LinkedList<>();
2733
for (TrieNode child : root.children.values()) {
28-
child.fail = root;
34+
child.fail = root; // 根节点的子节点的失效链接都指向根节点
2935
queue.add(child);
3036
}
3137
while (!queue.isEmpty()) {
@@ -35,58 +41,61 @@ public void buildFailureNode() {
3541
queue.add(child);
3642
TrieNode failNode = current.fail;
3743
while (failNode != null && !failNode.children.containsKey(c))
38-
failNode = failNode.fail;
39-
child.fail = failNode != null ? failNode.children.get(c) : root;
44+
failNode = failNode.fail; // 寻找失效链接的节点
45+
child.fail = failNode != null ? failNode.children.get(c) : root; // 如果找到了失效链接的节点,则指向该节点的对应子节点,否则指向根节点
4046
}
4147
}
4248
}
4349

50+
// 检查文本中是否存在关键词
4451
public boolean checkText(String text) {
4552
TrieNode current = root;
4653
for (char c : text.toCharArray()) {
47-
if (this.isInvalidChar(c)) continue;
54+
if (isInvalidChar(c)) continue; // 如果字符无效,则跳过
4855
while (current != null && !current.children.containsKey(c))
49-
current = current.fail;
56+
current = current.fail; // 如果当前节点的子节点中不存在该字符,则跟随失效链接向上查找
5057
if (current == null) {
51-
current = root;
58+
current = root; // 如果没有找到,则回到根节点并继续查找
5259
continue;
5360
}
54-
current = current.children.get(c);
55-
if (current.end) return true;
61+
current = current.children.get(c); // 如果找到了,则转到下一个子节点
62+
if (current.end) return true; // 如果找到了一个关键词的结束节点,则返回true
5663
}
57-
return false;
64+
return false; // 如果没有找到任何关键词,则返回false
5865
}
5966

67+
// 检查文本中的关键词数量
6068
public int checkTextWithCount(String text) {
6169
Set<TrieNode> nodes = new HashSet<>();
6270
int count = 0;
6371
TrieNode current = root;
6472
for (char c : text.toCharArray()) {
65-
if (this.isInvalidChar(c)) continue;
73+
if (isInvalidChar(c)) continue; // 无效字符直接跳过
6674
while (current != null && !current.children.containsKey(c))
67-
current = current.fail;
75+
current = current.fail; // 如果当前节点的子节点中不存在该字符,则跟随失效链接向上查找
6876
if (current == null) {
69-
current = root;
77+
current = root; // 如果没有找到,则回到根节点并继续查找
7078
continue;
7179
}
72-
current = current.children.get(c);
80+
current = current.children.get(c); // 如果找到了,则转到下一个子节点
7381
TrieNode tmp = current;
7482
while (tmp != null) {
7583
if(tmp.end && !nodes.contains(tmp)) {
76-
nodes.add(tmp);
84+
nodes.add(tmp); // 如果找到了一个关键词的结束节点,并且该节点还没有被计数过,则计数加1
7785
count++;
7886
}
79-
tmp = tmp.fail;
87+
tmp = tmp.fail; // 向上跟随失效链接查找其他可能的关键词
8088
}
8189
}
82-
return count;
90+
return count; // 返回找到的关键词数量
8391
}
8492

93+
// 检查字符是否为无效字符,这里定义了无效字符为非英文、非数字和非中文的字符
8594
private boolean isInvalidChar(char c) {
86-
String regex = "[^a-zA-Z0-9一-龥]";
87-
return Pattern.matches(regex, String.valueOf(c));
95+
return INVALID_CHAR_PATTERN.matcher(String.valueOf(c)).matches();
8896
}
8997

98+
// Trie树的节点类,包含子节点、失效链接和结束标记
9099
private static class TrieNode {
91100
HashMap<Character, TrieNode> children;
92101
TrieNode fail;

0 commit comments

Comments
 (0)