Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
96 changes: 96 additions & 0 deletions divide_and_conquer/Suffix Array and LCP implementation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,96 @@
from typing import List

Check failure on line 1 in divide_and_conquer/Suffix Array and LCP implementation.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (N999)

divide_and_conquer/Suffix Array and LCP implementation.py:1:1: N999 Invalid module name: 'Suffix Array and LCP implementation'

Check failure on line 1 in divide_and_conquer/Suffix Array and LCP implementation.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (UP035)

divide_and_conquer/Suffix Array and LCP implementation.py:1:1: UP035 `typing.List` is deprecated, use `list` instead


class SuffixArray:
def __init__(self, text: str) -> None:
"""
Initializes the class with the input text and builds the suffix and LCP arrays.
"""
self.text = text
self.suffix_array = self.build_suffix_array()
self.lcp_array = self.build_lcp_array()

def build_suffix_array(self) -> List[int]:

Check failure on line 13 in divide_and_conquer/Suffix Array and LCP implementation.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (UP006)

divide_and_conquer/Suffix Array and LCP implementation.py:13:37: UP006 Use `list` instead of `List` for type annotation
"""
Builds the suffix array for the input string.
Returns the suffix array (a list of starting indices of suffixes in sorted order).

Check failure on line 16 in divide_and_conquer/Suffix Array and LCP implementation.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

divide_and_conquer/Suffix Array and LCP implementation.py:16:89: E501 Line too long (90 > 88)

Example:
>>> sa = SuffixArray("banana")
>>> sa.suffix_array
[5, 3, 1, 0, 4, 2]
"""
n = len(self.text)
suffixes = sorted(range(n), key=lambda i: self.text[i:])

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Please provide descriptive name for the parameter: i

return suffixes

def build_lcp_array(self) -> List[int]:

Check failure on line 27 in divide_and_conquer/Suffix Array and LCP implementation.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (UP006)

divide_and_conquer/Suffix Array and LCP implementation.py:27:34: UP006 Use `list` instead of `List` for type annotation
"""
Builds the LCP (Longest Common Prefix) array for the suffix array.
LCP[i] gives the length of the longest common prefix of the suffixes starting at suffix_array[i] and suffix_array[i-1].

Check failure on line 30 in divide_and_conquer/Suffix Array and LCP implementation.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

divide_and_conquer/Suffix Array and LCP implementation.py:30:89: E501 Line too long (127 > 88)

Example:
>>> sa = SuffixArray("banana")
>>> sa.lcp_array
[0, 1, 3, 0, 0, 2]
"""
n = len(self.text)
suffix_array = self.suffix_array
rank = [0] * n
lcp = [0] * n

# Build the rank array where rank[i] gives the position of the suffix starting at index i

Check failure on line 42 in divide_and_conquer/Suffix Array and LCP implementation.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

divide_and_conquer/Suffix Array and LCP implementation.py:42:89: E501 Line too long (97 > 88)
for i, suffix in enumerate(suffix_array):
rank[suffix] = i

h = 0
for i in range(n):
if rank[i] > 0:
j = suffix_array[rank[i] - 1]
while (
(i + h < n) and (j + h < n) and self.text[i + h] == self.text[j + h]
):
h += 1
lcp[rank[i]] = h
if h > 0:
h -= 1
return lcp

def display(self) -> None:
"""
Displays the suffix array and LCP array for the input string.

Example:
>>> sa = SuffixArray("banana")
>>> sa.display()
Suffix Array:
5: a
3: ana
1: anana
0: banana
4: na
2: nana

LCP Array:
LCP between a and ana: 1
LCP between ana and anana: 3
LCP between anana and banana: 0
LCP between banana and na: 0
LCP between na and nana: 2
"""
print("Suffix Array:")
for idx in self.suffix_array:
print(f"{idx}: {self.text[idx:]}")

print("\nLCP Array:")
for i in range(1, len(self.lcp_array)):
print(
f"LCP between {self.text[self.suffix_array[i - 1]:]} and {self.text[self.suffix_array[i]:]}: {self.lcp_array[i]}"

Check failure on line 88 in divide_and_conquer/Suffix Array and LCP implementation.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

divide_and_conquer/Suffix Array and LCP implementation.py:88:89: E501 Line too long (129 > 88)
)


# Example usage:
if __name__ == "__main__":
text = "banana"
sa = SuffixArray(text)
sa.display()
Loading