Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 33 additions & 16 deletions strings/boyer_moore_search.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,30 +11,42 @@
a shift is proposed that moves the entirety of Pattern past
the point of mismatch in the text.

If there no mismatch then the pattern matches with text block.
If there is no mismatch then the pattern matches with text block.

Time Complexity : O(n/m)
n=length of main string
m=length of pattern string
"""

from __future__ import annotations


class BoyerMooreSearch:
"""
Example usage:

bms = BoyerMooreSearch(text="ABAABA", pattern="AB")
positions = bms.bad_character_heuristic()

where 'positions' contain the locations where the pattern was matched.
"""

def __init__(self, text: str, pattern: str):
self.text, self.pattern = text, pattern
self.textLen, self.patLen = len(text), len(pattern)

def match_in_pattern(self, char: str) -> int:
"""finds the index of char in pattern in reverse order
"""
Finds the index of char in pattern in reverse order.

Parameters :
char (chr): character to be searched

Returns :
i (int): index of char from last in pattern
-1 (int): if char is not found in pattern

>>> bms = BoyerMooreSearch(text="ABAABA", pattern="AB")
>>> bms.match_in_pattern("B")
1
"""

for i in range(self.patLen - 1, -1, -1):
Expand All @@ -44,15 +56,19 @@ def match_in_pattern(self, char: str) -> int:

def mismatch_in_text(self, current_pos: int) -> int:
"""
find the index of mis-matched character in text when compared with pattern
from last
Find the index of mis-matched character in text when compared with pattern
from last.

Parameters :
current_pos (int): current index position of text

Returns :
i (int): index of mismatched char from last in text
-1 (int): if there is no mismatch between pattern and text block

>>> bms = BoyerMooreSearch(text="ABAABA", pattern="AB")
>>> bms.mismatch_in_text(2)
3
"""

for i in range(self.patLen - 1, -1, -1):
Expand All @@ -61,7 +77,14 @@ def mismatch_in_text(self, current_pos: int) -> int:
return -1

def bad_character_heuristic(self) -> list[int]:
# searches pattern in text and returns index positions
"""
Finds the positions of the pattern location.

>>> bms = BoyerMooreSearch(text="ABAABA", pattern="AB")
>>> bms.bad_character_heuristic()
[0, 3]
"""

positions = []
for i in range(self.textLen - self.patLen + 1):
mismatch_index = self.mismatch_in_text(i)
Expand All @@ -75,13 +98,7 @@ def bad_character_heuristic(self) -> list[int]:
return positions


text = "ABAABA"
pattern = "AB"
bms = BoyerMooreSearch(text, pattern)
positions = bms.bad_character_heuristic()
if __name__ == "__main__":
import doctest

if len(positions) == 0:
print("No match found")
else:
print("Pattern found in following positions: ")
print(positions)
doctest.testmod()