Skip to content

Commit 5a064ff

Browse files
committed
bm in python
1 parent 17bbd62 commit 5a064ff

File tree

1 file changed

+120
-0
lines changed

1 file changed

+120
-0
lines changed

python/33_bm/bm_.py

Lines changed: 120 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,120 @@
1+
#!/usr/bin/python
2+
# -*- coding: UTF-8 -*-
3+
4+
SIZE = 256
5+
6+
7+
def bm(main, pattern):
8+
"""
9+
BM算法
10+
匹配规则:
11+
1. 坏字符规则
12+
2. 好字符规则
13+
:param main:
14+
:param pattern:
15+
:return:
16+
"""
17+
assert type(main) is str and type(pattern) is str
18+
n, m = len(main), len(pattern)
19+
20+
if n <= m:
21+
return 0 if main == pattern else -1
22+
23+
# bc
24+
bc = [-1] * SIZE
25+
generate_bc(pattern, m, bc)
26+
27+
# gs
28+
suffix = [-1] * m
29+
prefix = [False] * m
30+
generate_gs(pattern, m, suffix, prefix)
31+
32+
i = 0
33+
while i < n-m+1:
34+
j = m - 1
35+
while j >= 0:
36+
if main[i+j] != pattern[j]:
37+
break
38+
else:
39+
j -= 1
40+
41+
# pattern整个已被匹配,返回
42+
if j == -1:
43+
return i
44+
45+
# 1. bc规则计算后移位数
46+
x = j - bc[ord(main[i+j])]
47+
48+
# 2. gs规则计算后移位数
49+
y = 0
50+
if j != m - 1: # 存在gs
51+
y = move_by_gs(j, m, suffix, prefix)
52+
53+
i += max(x, y)
54+
55+
return -1
56+
57+
58+
def generate_bc(pattern, m, bc):
59+
"""
60+
生成坏字符哈希表
61+
:param pattern:
62+
:param m:
63+
:param bc:
64+
:return:
65+
"""
66+
for i in range(m):
67+
bc[ord(pattern[i])] = i
68+
69+
70+
def generate_gs(pattern, m, suffix, prefix):
71+
"""
72+
好后缀预处理
73+
:param pattern:
74+
:param m:
75+
:param suffix:
76+
:param prefix:
77+
:return:
78+
"""
79+
for i in range(m-1):
80+
k = 0 # pattern[:i+1]和pattern的公共后缀长度
81+
for j in range(i, -1, -1):
82+
if pattern[j] == pattern[m-1-k]:
83+
k += 1
84+
suffix[k] = j
85+
if j == 0:
86+
prefix[k] = True
87+
else:
88+
break
89+
90+
91+
def move_by_gs(j, m, suffix, prefix):
92+
"""
93+
通过好后缀计算移动值
94+
需要处理三种情况:
95+
1. 整个好后缀能和pattern的剩余字符匹配
96+
2. 好后缀里存在 *后缀子串* 能和pattern的 *前缀* 匹配
97+
3. 其他
98+
:param j:
99+
:param m:
100+
:param suffix:
101+
:param prefix:
102+
:return:
103+
"""
104+
k = m - 1 - j # j指向从后往前的第一个坏字符,k是此次匹配的好后缀的长度
105+
106+
if suffix[k] != -1: # 1. 整个好后缀在pattern剩余字符中仍有出现
107+
return j - suffix[k] + 1
108+
else:
109+
for r in range(j+2, m): # 2. 后缀子串从长到短搜索
110+
if prefix[m-r]:
111+
return r
112+
return m # 3. 其他情况
113+
114+
115+
if __name__ == '__main__':
116+
print('--- search ---')
117+
m_str = 'dfasdeeeetewtweyyyhtruuueyytewtweyyhtrhrth'
118+
p_str = 'eyytewtweyy'
119+
print('[Built-in Functions] result:', m_str.find(p_str))
120+
print('[bm] result:', bm(m_str, p_str))

0 commit comments

Comments
 (0)