Skip to content

Commit 27fb07f

Browse files
committed
feat: Implement and test Bitap algorithm for exact string matching
- Added the Bitap (Shift-Or) algorithm for efficient exact string matching. - Included comprehensive test cases to validate the implementation. - Fixed minor issues in the Bitap algorithm logic and improved readability. - Ensured the algorithm handles edge cases such as empty strings and patterns longer than 64 characters.
1 parent ed88315 commit 27fb07f

File tree

4 files changed

+45
-4
lines changed

4 files changed

+45
-4
lines changed
Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
11
Algorithms
22
==========
33

4-
.. autofunction:: pydatastructs.find
4+
.. autofunction:: pydatastructs.find
5+
.. autofunction:: pydatastructs.bitap_search

pydatastructs/strings/__init__.py

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -12,7 +12,8 @@
1212
__all__.extend(trie.__all__)
1313

1414
from .algorithms import (
15-
find
15+
find,
16+
bitap_search
1617
)
1718

1819
__all__.extend(algorithms.__all__)

pydatastructs/strings/algorithms.py

Lines changed: 30 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4,7 +4,8 @@
44
Backend, raise_if_backend_is_not_python)
55

66
__all__ = [
7-
'find'
7+
'find',
8+
'bitap_search'
89
]
910

1011
PRIME_NUMBER, MOD = 257, 1000000007
@@ -83,6 +84,34 @@ def find(text, query, algorithm, **kwargs):
8384
%(algorithm))
8485
return getattr(algorithms, func)(text, query)
8586

87+
def bitap_search(text: str, pattern: str) -> int:
88+
"""
89+
Bitap Algorithm (Shift-Or Algorithm) for exact string matching.
90+
Returns the starting index of the pattern in the text, or -1 if not found.
91+
"""
92+
m = len(pattern)
93+
if m == 0:
94+
return 0
95+
if m > 64:
96+
raise ValueError("Bitap algorithm supports patterns up to 64 characters.")
97+
98+
pattern_mask = {}
99+
for i, char in enumerate(pattern):
100+
pattern_mask[char] = pattern_mask.get(char, ~0) & ~(1 << i)
101+
102+
R = ~1
103+
104+
for i, char in enumerate(text):
105+
R = (R << 1) | 1
106+
if char in pattern_mask:
107+
R &= pattern_mask[char]
108+
else:
109+
R = ~1
110+
111+
if (R & (1 << (m - 1))) == 0:
112+
return i - m + 1
113+
114+
return -1
86115

87116
def _knuth_morris_pratt(text, query):
88117
if len(text) == 0 or len(query) == 0:

pydatastructs/strings/tests/test_algorithms.py

Lines changed: 11 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
from pydatastructs.strings import find
1+
from pydatastructs.strings import find, bitap_search
22

33
import random, string
44

@@ -14,6 +14,16 @@ def test_bm():
1414
def test_zf():
1515
_test_common_string_matching('z_function')
1616

17+
def test_bitap_search():
18+
assert bitap_search("hello world", "world") == 6
19+
assert bitap_search("abcdef", "def") == 3
20+
assert bitap_search("abcdef", "gh") == -1
21+
assert bitap_search("aaaaa", "aa") == 0
22+
assert bitap_search("abababab", "bab") == 1
23+
assert bitap_search("", "a") == -1
24+
assert bitap_search("a", "") == 0
25+
print("All tests passed.")
26+
1727
def _test_common_string_matching(algorithm):
1828
true_text_pattern_dictionary = {
1929
"Knuth-Morris-Pratt": "-Morris-",

0 commit comments

Comments
 (0)