Skip to content
Closed
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
82 changes: 82 additions & 0 deletions divide_and_conquer/suffix_array_lcp.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,82 @@
#!/usr/bin/env python3

import doctest

Check failure on line 3 in divide_and_conquer/suffix_array_lcp.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (F401)

divide_and_conquer/suffix_array_lcp.py:3:8: F401 `doctest` imported but unused


def build_suffix_array(input_string: str) -> list[int]:
"""
Build the suffix array for the given string.

Parameters:
input_string (str): The input string.

Returns:
list[int]: The suffix array (a list of starting indices of
suffixes in sorted order).

Example:
>>> build_suffix_array("banana")
[5, 3, 1, 0, 4, 2]
"""
suffixes = [(input_string[i:], i) for i in range(len(input_string))]
suffixes.sort() # Sort the suffixes lexicographically
suffix_array = [suffix[1] for suffix in suffixes]
return suffix_array


def build_lcp_array(input_string: str, suffix_array: list[int]) -> list[int]:
"""
Build the LCP array for the given string and suffix array.

Parameters:
input_string (str): The input string.
suffix_array (list[int]): The suffix array.

Returns:
list[int]: The LCP array.

Example:
>>> suffix_arr = build_suffix_array("banana")
>>> build_lcp_array("banana", suffix_arr)
[0, 1, 3, 0, 0, 2]
"""
n = len(input_string)
rank = [0] * n
lcp = [0] * n

# Compute the rank of each suffix
for i, suffix_index in enumerate(suffix_array):
rank[suffix_index] = i

# Compute the LCP array
h = 0
for i in range(n):
if rank[i] > 0:
j = suffix_array[rank[i] - 1]
while (
(i + h < n)
and (j + h < n)
and (input_string[i + h] == input_string[j + h])
):
h += 1
lcp[rank[i]] = h
if h > 0:
h -= 1 # Decrease h for the next suffix
return lcp


# Example usage
if __name__ == "__main__":
test_string = "banana"
suffix_array = build_suffix_array(test_string)
lcp_array = build_lcp_array(test_string, suffix_array)

print("Suffix Array:")
for i in range(len(suffix_array)):
print(f"{suffix_array[i]}: {test_string[suffix_array[i]:]}")

print("\nLCP Array:")
for i in range(1, len(lcp_array)):
print(
f"LCP between {test_string[suffix_array[i - 1]:]} and {test_string[suffix_array[i]]}: {lcp_array[i]}"

Check failure on line 81 in divide_and_conquer/suffix_array_lcp.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (E501)

divide_and_conquer/suffix_array_lcp.py:81:89: E501 Line too long (113 > 88)
)
Loading