Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 6 additions & 0 deletions DIRECTORY.md
Original file line number Diff line number Diff line change
Expand Up @@ -313,6 +313,10 @@
* [Node](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/ternary/node.py)
* [Test Ternary Tree Paths](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/ternary/test_ternary_tree_paths.py)
* Trie
* Suffix
* [Suffix Tree](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/trie/suffix/suffix_tree.py)
* [Suffix Tree Node](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/trie/suffix/suffix_tree_node.py)
* [Types](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/trie/suffix/types.py)
* [Trie](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/trie/trie.py)
* [Trie Node](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/trie/trie_node.py)
* Tuples
Expand Down Expand Up @@ -719,6 +723,8 @@
* [Test Is Unique](https://github.com/BrianLusina/PythonSnips/blob/master/pystrings/is_unique/test_is_unique.py)
* Issubsequence
* [Test Is Subsequence](https://github.com/BrianLusina/PythonSnips/blob/master/pystrings/issubsequence/test_is_subsequence.py)
* Longest Common Suffix Queries
* [Test Longest Common Suffix Queries](https://github.com/BrianLusina/PythonSnips/blob/master/pystrings/longest_common_suffix_queries/test_longest_common_suffix_queries.py)
* Longest Self Contained Substring
* [Test Longest Self Contained Substring](https://github.com/BrianLusina/PythonSnips/blob/master/pystrings/longest_self_contained_substring/test_longest_self_contained_substring.py)
* Look And Say Sequence
Expand Down
14 changes: 14 additions & 0 deletions datastructures/trees/binary/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -8,6 +8,20 @@

class LowestCommonAncestorTestCase(unittest.TestCase):
def test_1(self):
"""
Test to ensure that the lowest common ancestor function returns the correct node for a given binary tree.

The binary tree is structured as follows:
10
/ \
11 22
/ \ / \
6 5 19 14
/ \
13 15

The function should return the node with value 5, given the nodes with values 13 and 15.
"""
root = BinaryTreeNode(data=10)

# left subtree
Expand Down
5 changes: 3 additions & 2 deletions datastructures/trees/trie/__init__.py
Original file line number Diff line number Diff line change
@@ -1,5 +1,6 @@
from datastructures.trees.trie.trie_node import TrieNode
from datastructures.trees.trie.trie import Trie
from datastructures.trees.trie.suffix.suffix_tree_node import SuffixTreeNode
from datastructures.trees.trie.suffix.suffix_tree import SuffixTree


__all__ = ["Trie", "TrieNode"]
__all__ = ["Trie", "TrieNode", "SuffixTreeNode", "SuffixTree"]
7 changes: 7 additions & 0 deletions datastructures/trees/trie/suffix/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
from datastructures.trees.trie.suffix.suffix_tree_node import SuffixTreeNode
from datastructures.trees.trie.suffix.suffix_tree import SuffixTree

__all__ = [
"SuffixTree",
"SuffixTreeNode",
]
84 changes: 84 additions & 0 deletions datastructures/trees/trie/suffix/suffix_tree.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,84 @@
from datastructures.trees.trie.suffix.suffix_tree_node import SuffixTreeNode
from datastructures.trees.trie.suffix.types import WordInfo


class SuffixTree:
"""
A suffix tree is a Trie that checks on suffixes of words instead of prefixes. This has been modified to meet the needs
of checking for suffixes of words that have a best match.

Basically, this is a Trie optimized for suffix matching by storing reversed strings.
Each node tracks the best candidate word for tie-breaking.
"""
def __init__(self):
super().__init__()
self.root = SuffixTreeNode()

@staticmethod
def _update_best_info(current_info: WordInfo, new_info: WordInfo) -> WordInfo:
"""
Applies the tie-breaking rules to select the better WordInfo.

Rules: 1. Smallest length wins. 2. Earliest index wins if lengths are equal.
"""
new_length, new_index = new_info
current_length, current_index = current_info

if new_length < current_length:
return new_info
elif new_length == current_length and new_index < current_index:
return new_info
return current_info

def insert(self, word: str, original_index: int):
"""Inserts a reversed word and updates best_info along the path."""
# The length of the original word is the primary sorting key
original_length = len(word)
new_info: WordInfo = (original_length, original_index)

node = self.root

# Update the root's best_info first, as every word passes through it
node.best_info = self._update_best_info(node.best_info, new_info)

# Insert the *reversed* word
reversed_word = word[::-1]

for char in reversed_word:
if char not in node.children:
node.children[char] = SuffixTreeNode()
node = node.children[char]

# Update best_info for the current node
node.best_info = self._update_best_info(node.best_info, new_info)

def search_best_index(self, query_word: str) -> int:
"""
Finds the index of the best match for the query word.

The best match will be stored in the TrieNode that represents the
longest common *prefix* of the reversed query and any reversed container word.
"""
# Search using the reversed query word
reversed_query = query_word[::-1]
node = self.root

# Initialize the result with the info from the root
# This covers the case where the longest common suffix is the empty string
# which means the best word overall must be chosen (which is stored at the root).
best_match_info = self.root.best_info

for char in reversed_query:
if char in node.children:
node = node.children[char]
# Any node reached represents a longer common suffix, so its
# best_info is the current best overall match found so far
best_match_info = node.best_info
else:
# No more characters match, the longest common prefix/suffix is found
break

# best_match_info is guaranteed to hold the best candidate due to the
# update logic during insertion.
# We return the original index stored in the info.
return best_match_info[1]
21 changes: 21 additions & 0 deletions datastructures/trees/trie/suffix/suffix_tree_node.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from typing import DefaultDict, Tuple
from collections import defaultdict
from datastructures.trees.trie.trie_node import TrieNode
from datastructures.trees.trie.suffix.types import WordInfo, INF_WORD_INFO


class SuffixTreeNode(TrieNode):
"""
This represents a node in our Suffix Trie structure.
Each node stores its children and the index of the best word
(shortest, earliest) that passes through this node.
"""
def __init__(self):
super().__init__()
# index of best word passing through this node
self.best_index = -1
self.children: DefaultDict[str, SuffixTreeNode] = defaultdict(SuffixTreeNode)
# Stores the best WordInfo (length, index) for any word that passes
# through or ends at this node. Initialized to infinity.
self.best_info: WordInfo = INF_WORD_INFO

6 changes: 6 additions & 0 deletions datastructures/trees/trie/suffix/types.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
from typing import DefaultDict, Tuple

# Type alias for the best word info: (length, original_index)
WordInfo = Tuple[int, int]
# Initialize with a very large length to ensure the first word always wins
INF_WORD_INFO: WordInfo = (float('inf'), float('inf'))
27 changes: 27 additions & 0 deletions pystrings/longest_common_suffix_queries/README.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
# Longest Common Suffix Queries

You are given two arrays of strings, wordsContainer and wordsQuery.
For each string wordsQuery[i], find the string in wordsContainer that shares the longest common suffix with it.
- If multiple strings in wordsContainer share the same longest suffix, choose the one with the smallest length.
- If two or more such strings have the same smallest length, choose the string that appears earliest in wordsContainer.

Return an array of integers ans, where ans[i] is the index of the chosen string in wordsContainer for the query
wordsQuery[i].

Constraints

- 1 ≤ wordsContainer.length, wordsQuery.length ≤ 10^4
- 1 ≤ wordsContainer[i].length ≤ 5 * 10 ^ 3
- 1 ≤ wordsQuery[i].length ≤ 5 * 10^3
- wordsContainer[i] consists only of lowercase English letters.
- wordsQuery[i] consists only of lowercase English letters.
- Sum of wordsContainer[i].length is, at most 5 * 10^5
- Sum of wordsQuery[i].length is, at most 5 * 10^5

## Examples

![Example 1](./images/examples/longest_common_suffix_queries_1.png)
![Example 2](./images/examples/longest_common_suffix_queries_2.png)
![Example 3](./images/examples/longest_common_suffix_queries_3.png)
![Example 4](./images/examples/longest_common_suffix_queries_4.png)
![Example 5](./images/examples/longest_common_suffix_queries_5.png)
21 changes: 21 additions & 0 deletions pystrings/longest_common_suffix_queries/__init__.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
from typing import List
from datastructures.trees.trie import SuffixTree


def longest_common_suffix_queries(
words_container: List[str], words_query: List[str]
) -> List[int]:
trie = SuffixTree()

# 1. Build the Trie from words_container
for i, word in enumerate(words_container):
trie.insert(word, i)

# 2. Process all queries
results = []
for query_word in words_query:
# Search the Trie and get the original index of the best match
best_index = trie.search_best_index(query_word)
results.append(best_index)

return results
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Loading
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
Original file line number Diff line number Diff line change
@@ -0,0 +1,98 @@
import unittest
from . import longest_common_suffix_queries


class LongestCommonSuffixQueriesTestCase(unittest.TestCase):
def test_1(self):
"""should return [1,1,1] for words_container=["mango","ango","xango"] and words_query=["go","ango","xyz"]"""
words_container = ["mango", "ango", "xango"]
words_query = ["go", "ango", "xyz"]
expected = [1, 1, 1]
actual = longest_common_suffix_queries(
words_container=words_container, words_query=words_query
)
self.assertEqual(expected, actual)

def test_2(self):
"""should return [0,0,0] for words_container=["flight", "night", "tight", "light"] and words_query=["ight","t","zzz"]"""
words_container = ["flight", "night", "tight", "light"]
words_query = ["ight", "t", "zzz"]
expected = [1, 1, 1]
actual = longest_common_suffix_queries(
words_container=words_container, words_query=words_query
)
self.assertEqual(expected, actual)

def test_3(self):
"""should return [1,1,0] for words_container=["hello", "yellow", "mellow", "fellow"] and words_query=["low", "ellow", "wow"]"""
words_container = ["hello", "yellow", "mellow", "fellow"]
words_query = ["low", "ellow", "wow"]
expected = [1, 1, 1]
actual = longest_common_suffix_queries(
words_container=words_container, words_query=words_query
)
self.assertEqual(expected, actual)

def test_4(self):
"""should return [3,3,3] for words_container=["cat", "start", "part", "art"] and words_query=["art", "rt", "xyz"]"""
words_container = ["cat", "start", "part", "art"]
words_query = ["art", "rt", "xyz"]
expected = [3, 3, 0]
actual = longest_common_suffix_queries(
words_container=words_container, words_query=words_query
)
self.assertEqual(expected, actual)

def test_5(self):
"""should return [0,1,2] for words_container=["abcde", "bcde", "cde"] and words_query=["abcde", "bcde", "cde"]"""
words_container = ["abcde", "bcde", "cde"]
words_query = ["abcde", "bcde", "cde"]
expected = [0, 1, 2]
actual = longest_common_suffix_queries(
words_container=words_container, words_query=words_query
)
self.assertEqual(expected, actual)

def test_6(self):
"""should return [2,2,2] for words_container=["starting","sting","ring"] and words_query=["ring","ing","random"]"""
words_container = ["starting", "sting", "ring"]
words_query = ["ring", "ing", "random"]
expected = [2, 2, 2]
actual = longest_common_suffix_queries(
words_container=words_container, words_query=words_query
)
self.assertEqual(expected, actual)

def test_7(self):
"""should return [1,1,1] for words_container=["alpha","beta","gamma"] and words_query=["ta","eta","zeta"]"""
words_container = ["alpha", "beta", "gamma"]
words_query = ["ta", "eta", "zeta"]
expected = [1, 1, 1]
actual = longest_common_suffix_queries(
words_container=words_container, words_query=words_query
)
self.assertEqual(expected, actual)

def test_8(self):
"""should return [2,2,2] for words_container=["respect","aspect","spect"] and words_query=["spect","ect","detect"]"""
words_container = ["respect", "aspect", "spect"]
words_query = ["spect", "ect", "detect"]
expected = [2, 2, 2]
actual = longest_common_suffix_queries(
words_container=words_container, words_query=words_query
)
self.assertEqual(expected, actual)

def test_9(self):
"""should return [2,0,2] for words_container=["abcdefgh","poiuygh","ghghgh"] and words_query=["gh","acbfgh","acbfegh"]"""
words_container = ["abcdefgh", "poiuygh", "ghghgh"]
words_query = ["gh", "acbfgh", "acbfegh"]
expected = [2, 0, 2]
actual = longest_common_suffix_queries(
words_container=words_container, words_query=words_query
)
self.assertEqual(expected, actual)


if __name__ == "__main__":
unittest.main()
Loading