Skip to content

Commit ea105e0

Browse files
Merge pull request #199 from KPatr1ck/ac_automata
ac自动机-python
2 parents a2eda83 + 9fd6594 commit ea105e0

File tree

2 files changed

+95
-2
lines changed

2 files changed

+95
-2
lines changed

python/35_trie/trie_.py

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -15,16 +15,18 @@ def __init__(self, c):
1515
self.children = []
1616

1717
def insert_child(self, c):
18+
self._insert_child(Node(c))
19+
20+
def _insert_child(self, node):
1821
"""
1922
插入一个子节点
2023
:param c:
2124
:return:
2225
"""
23-
v = ord(c)
26+
v = ord(node.data)
2427
idx = self._find_insert_idx(v)
2528
length = len(self.children)
2629

27-
node = Node(c)
2830
if idx == length:
2931
self.children.append(node)
3032
else:
@@ -33,6 +35,9 @@ def insert_child(self, c):
3335
self.children[i] = self.children[i-1]
3436
self.children[idx] = node
3537

38+
def has_child(self, c):
39+
return True if self.get_child(c) is not None else False
40+
3641
def get_child(self, c):
3742
"""
3843
搜索子节点并返回
Lines changed: 88 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,88 @@
1+
#!/usr/bin/python
2+
# -*- coding: UTF-8 -*-
3+
4+
from trie_ import Node, Trie
5+
from queue import Queue
6+
7+
8+
class ACNode(Node):
9+
def __init__(self, c: str):
10+
super(ACNode, self).__init__(c)
11+
self.fail = None
12+
self.length = 0
13+
14+
def insert_child(self, c: str):
15+
self._insert_child(ACNode(c))
16+
17+
18+
class ACTrie(Trie):
19+
def __init__(self):
20+
self.root = ACNode(None)
21+
22+
23+
def ac_automata(main: str, ac_trie: ACTrie) -> list:
24+
root = ac_trie.root
25+
build_failure_pointer(ac_trie)
26+
27+
ret = []
28+
p = root
29+
for i, c in enumerate(main):
30+
while p != root and not p.has_child(c):
31+
p = p.fail
32+
33+
if p.has_child(c): # a char matched, try to find all potential pattern matched
34+
q = p.get_child(c)
35+
while q != root:
36+
if q.is_ending_char:
37+
ret.append((i-q.length+1, i))
38+
# ret.append(main[i-q.length+1:i+1])
39+
q = q.fail
40+
p = p.get_child(c)
41+
42+
return ret
43+
44+
45+
def build_failure_pointer(ac_trie: ACTrie) -> None:
46+
root = ac_trie.root
47+
48+
# queue: [(node, node.length) ....]
49+
node_queue = Queue()
50+
node_queue.put((root, root.length))
51+
52+
root.fail = None
53+
while not node_queue.empty():
54+
p, length = node_queue.get()
55+
for pc in p.children:
56+
pc.length = length + 1
57+
if p == root:
58+
pc.fail = root
59+
else:
60+
q = p.fail
61+
# same as kmp
62+
while q != root and not q.has_child(pc.data):
63+
q = q.fail
64+
65+
# cases now:
66+
# 1. q == root
67+
# 2. q != root and q.has_child(pc.data)
68+
if q.has_child(pc.data):
69+
pc.fail = q.get_child(pc.data)
70+
else:
71+
pc.fail = root
72+
node_queue.put((pc, pc.length))
73+
74+
75+
if __name__ == '__main__':
76+
ac_trie = ACTrie()
77+
ac_trie.gen_tree(['fuck', 'shit', 'TMD', '傻叉'])
78+
79+
print('--- ac automata ---')
80+
m_str = 'fuck you, what is that shit, TMD你就是个傻叉傻叉傻叉叉'
81+
print('original str : {}'.format(m_str))
82+
83+
filter_range_list = ac_automata(m_str, ac_trie)
84+
str_filtered = m_str
85+
for start, end in filter_range_list:
86+
str_filtered = str_filtered.replace(str_filtered[start:end+1], '*'*(end+1-start))
87+
88+
print('after filtered: {}'.format(str_filtered))

0 commit comments

Comments
 (0)