Skip to content

Commit 8e66551

Browse files
Merge pull request #186 from jerryderry/kmp-python
kmp in python
2 parents d869727 + 3fed97d commit 8e66551

File tree

1 file changed

+61
-0
lines changed

1 file changed

+61
-0
lines changed

python/34_kmp/kmp.py

Lines changed: 61 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,61 @@
1+
"""
2+
KMP algorithm
3+
4+
Author: Wenru Dong
5+
"""
6+
7+
from typing import List
8+
9+
def kmp(s: int, pattern: int) -> int:
10+
m = len(pattern)
11+
partial_match_table = _get_partial_match_table(pattern)
12+
j = 0
13+
for i in range(len(s)):
14+
while j >= 0 and s[i] != pattern[j]:
15+
j = partial_match_table[j]
16+
j += 1
17+
if j == m:
18+
return i - m + 1
19+
return -1
20+
21+
22+
def _get_partial_match_table(pattern: int) -> List[int]:
23+
# Denote πᵏ(i) as π applied to i for k times,
24+
# i.e., π²(i) = π(π(i)).
25+
# Then we have the result:
26+
# π(i) = πᵏ(i-1) + 1,
27+
# where k is the smallest integer such that
28+
# pattern[πᵏ(i-1)+1] == pattern[i].
29+
30+
# The value of π means the maximum length
31+
# of proper prefix/suffix.
32+
# The index of π means the length of the prefix
33+
# considered for pattern.
34+
# For example, π[2] means we are considering the first 2 characters
35+
# of the pattern.
36+
# If π[2] == 1, it means for the prefix of the pattern, P[0]P[1],
37+
# it has a maximum length proper prefix of 1, which is also the
38+
# suffix of P[0]P[1].
39+
# We also add a π[0] == -1 for easier handling of boundary
40+
# condition.
41+
42+
m = len(pattern)
43+
π = [0] * (m + 1)
44+
π[0] = k = -1 # We use k here to represent πᵏ(i)
45+
for i in range(1, m + 1):
46+
while k >= 0 and pattern[k] != pattern[i - 1]:
47+
k = π[k]
48+
k += 1
49+
π[i] = k
50+
return π
51+
52+
53+
if __name__ == "__main__":
54+
55+
s = "abc abcdab abcdabcdabde"
56+
pattern = "bcdabd"
57+
print(kmp(s, pattern), s.find(pattern))
58+
59+
s = "hello"
60+
pattern = "ll"
61+
print(kmp(s, pattern), s.find(pattern))

0 commit comments

Comments
 (0)