diff --git a/DIRECTORY.md b/DIRECTORY.md index e0d6b823..f325034e 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -313,6 +313,10 @@ * [Node](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/ternary/node.py) * [Test Ternary Tree Paths](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/ternary/test_ternary_tree_paths.py) * Trie + * Suffix + * [Suffix Tree](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/trie/suffix/suffix_tree.py) + * [Suffix Tree Node](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/trie/suffix/suffix_tree_node.py) + * [Types](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/trie/suffix/types.py) * [Trie](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/trie/trie.py) * [Trie Node](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/trie/trie_node.py) * Tuples @@ -719,6 +723,8 @@ * [Test Is Unique](https://github.com/BrianLusina/PythonSnips/blob/master/pystrings/is_unique/test_is_unique.py) * Issubsequence * [Test Is Subsequence](https://github.com/BrianLusina/PythonSnips/blob/master/pystrings/issubsequence/test_is_subsequence.py) + * Longest Common Suffix Queries + * [Test Longest Common Suffix Queries](https://github.com/BrianLusina/PythonSnips/blob/master/pystrings/longest_common_suffix_queries/test_longest_common_suffix_queries.py) * Longest Self Contained Substring * [Test Longest Self Contained Substring](https://github.com/BrianLusina/PythonSnips/blob/master/pystrings/longest_self_contained_substring/test_longest_self_contained_substring.py) * Look And Say Sequence diff --git a/datastructures/trees/binary/test_utils.py b/datastructures/trees/binary/test_utils.py index ed9e7fdc..1bdc47cc 100644 --- a/datastructures/trees/binary/test_utils.py +++ b/datastructures/trees/binary/test_utils.py @@ -8,6 +8,20 @@ class LowestCommonAncestorTestCase(unittest.TestCase): def test_1(self): + """ + Test to ensure that the lowest common ancestor function returns the correct node for a given binary tree. + + The binary tree is structured as follows: + 10 + / \ + 11 22 + / \ / \ + 6 5 19 14 + / \ + 13 15 + + The function should return the node with value 5, given the nodes with values 13 and 15. + """ root = BinaryTreeNode(data=10) # left subtree diff --git a/datastructures/trees/trie/__init__.py b/datastructures/trees/trie/__init__.py index 2d180273..346e45bd 100644 --- a/datastructures/trees/trie/__init__.py +++ b/datastructures/trees/trie/__init__.py @@ -1,5 +1,6 @@ from datastructures.trees.trie.trie_node import TrieNode from datastructures.trees.trie.trie import Trie +from datastructures.trees.trie.suffix.suffix_tree_node import SuffixTreeNode +from datastructures.trees.trie.suffix.suffix_tree import SuffixTree - -__all__ = ["Trie", "TrieNode"] +__all__ = ["Trie", "TrieNode", "SuffixTreeNode", "SuffixTree"] diff --git a/datastructures/trees/trie/suffix/__init__.py b/datastructures/trees/trie/suffix/__init__.py new file mode 100644 index 00000000..271bf50a --- /dev/null +++ b/datastructures/trees/trie/suffix/__init__.py @@ -0,0 +1,7 @@ +from datastructures.trees.trie.suffix.suffix_tree_node import SuffixTreeNode +from datastructures.trees.trie.suffix.suffix_tree import SuffixTree + +__all__ = [ + "SuffixTree", + "SuffixTreeNode", +] diff --git a/datastructures/trees/trie/suffix/suffix_tree.py b/datastructures/trees/trie/suffix/suffix_tree.py new file mode 100644 index 00000000..659e696c --- /dev/null +++ b/datastructures/trees/trie/suffix/suffix_tree.py @@ -0,0 +1,84 @@ +from datastructures.trees.trie.suffix.suffix_tree_node import SuffixTreeNode +from datastructures.trees.trie.suffix.types import WordInfo + + +class SuffixTree: + """ + A suffix tree is a Trie that checks on suffixes of words instead of prefixes. This has been modified to meet the needs + of checking for suffixes of words that have a best match. + + Basically, this is a Trie optimized for suffix matching by storing reversed strings. + Each node tracks the best candidate word for tie-breaking. + """ + def __init__(self): + super().__init__() + self.root = SuffixTreeNode() + + @staticmethod + def _update_best_info(current_info: WordInfo, new_info: WordInfo) -> WordInfo: + """ + Applies the tie-breaking rules to select the better WordInfo. + + Rules: 1. Smallest length wins. 2. Earliest index wins if lengths are equal. + """ + new_length, new_index = new_info + current_length, current_index = current_info + + if new_length < current_length: + return new_info + elif new_length == current_length and new_index < current_index: + return new_info + return current_info + + def insert(self, word: str, original_index: int): + """Inserts a reversed word and updates best_info along the path.""" + # The length of the original word is the primary sorting key + original_length = len(word) + new_info: WordInfo = (original_length, original_index) + + node = self.root + + # Update the root's best_info first, as every word passes through it + node.best_info = self._update_best_info(node.best_info, new_info) + + # Insert the *reversed* word + reversed_word = word[::-1] + + for char in reversed_word: + if char not in node.children: + node.children[char] = SuffixTreeNode() + node = node.children[char] + + # Update best_info for the current node + node.best_info = self._update_best_info(node.best_info, new_info) + + def search_best_index(self, query_word: str) -> int: + """ + Finds the index of the best match for the query word. + + The best match will be stored in the TrieNode that represents the + longest common *prefix* of the reversed query and any reversed container word. + """ + # Search using the reversed query word + reversed_query = query_word[::-1] + node = self.root + + # Initialize the result with the info from the root + # This covers the case where the longest common suffix is the empty string + # which means the best word overall must be chosen (which is stored at the root). + best_match_info = self.root.best_info + + for char in reversed_query: + if char in node.children: + node = node.children[char] + # Any node reached represents a longer common suffix, so its + # best_info is the current best overall match found so far + best_match_info = node.best_info + else: + # No more characters match, the longest common prefix/suffix is found + break + + # best_match_info is guaranteed to hold the best candidate due to the + # update logic during insertion. + # We return the original index stored in the info. + return best_match_info[1] diff --git a/datastructures/trees/trie/suffix/suffix_tree_node.py b/datastructures/trees/trie/suffix/suffix_tree_node.py new file mode 100644 index 00000000..1989b796 --- /dev/null +++ b/datastructures/trees/trie/suffix/suffix_tree_node.py @@ -0,0 +1,21 @@ +from typing import DefaultDict, Tuple +from collections import defaultdict +from datastructures.trees.trie.trie_node import TrieNode +from datastructures.trees.trie.suffix.types import WordInfo, INF_WORD_INFO + + +class SuffixTreeNode(TrieNode): + """ + This represents a node in our Suffix Trie structure. + Each node stores its children and the index of the best word + (shortest, earliest) that passes through this node. + """ + def __init__(self): + super().__init__() + # index of best word passing through this node + self.best_index = -1 + self.children: DefaultDict[str, SuffixTreeNode] = defaultdict(SuffixTreeNode) + # Stores the best WordInfo (length, index) for any word that passes + # through or ends at this node. Initialized to infinity. + self.best_info: WordInfo = INF_WORD_INFO + diff --git a/datastructures/trees/trie/suffix/types.py b/datastructures/trees/trie/suffix/types.py new file mode 100644 index 00000000..a93c1478 --- /dev/null +++ b/datastructures/trees/trie/suffix/types.py @@ -0,0 +1,6 @@ +from typing import DefaultDict, Tuple + +# Type alias for the best word info: (length, original_index) +WordInfo = Tuple[int, int] +# Initialize with a very large length to ensure the first word always wins +INF_WORD_INFO: WordInfo = (float('inf'), float('inf')) diff --git a/pystrings/longest_common_suffix_queries/README.md b/pystrings/longest_common_suffix_queries/README.md new file mode 100644 index 00000000..3706033c --- /dev/null +++ b/pystrings/longest_common_suffix_queries/README.md @@ -0,0 +1,27 @@ +# Longest Common Suffix Queries + +You are given two arrays of strings, wordsContainer and wordsQuery. +For each string wordsQuery[i], find the string in wordsContainer that shares the longest common suffix with it. +- If multiple strings in wordsContainer share the same longest suffix, choose the one with the smallest length. +- If two or more such strings have the same smallest length, choose the string that appears earliest in wordsContainer. + +Return an array of integers ans, where ans[i] is the index of the chosen string in wordsContainer for the query +wordsQuery[i]. + +Constraints + +- 1 ≤ wordsContainer.length, wordsQuery.length ≤ 10^4 +- 1 ≤ wordsContainer[i].length ≤ 5 * 10 ^ 3 +- 1 ≤ wordsQuery[i].length ≤ 5 * 10^3 +- wordsContainer[i] consists only of lowercase English letters. +- wordsQuery[i] consists only of lowercase English letters. +- Sum of wordsContainer[i].length is, at most 5 * 10^5 +- Sum of wordsQuery[i].length is, at most 5 * 10^5 + +## Examples + +![Example 1](./images/examples/longest_common_suffix_queries_1.png) +![Example 2](./images/examples/longest_common_suffix_queries_2.png) +![Example 3](./images/examples/longest_common_suffix_queries_3.png) +![Example 4](./images/examples/longest_common_suffix_queries_4.png) +![Example 5](./images/examples/longest_common_suffix_queries_5.png) diff --git a/pystrings/longest_common_suffix_queries/__init__.py b/pystrings/longest_common_suffix_queries/__init__.py new file mode 100644 index 00000000..2e8da5e6 --- /dev/null +++ b/pystrings/longest_common_suffix_queries/__init__.py @@ -0,0 +1,21 @@ +from typing import List +from datastructures.trees.trie import SuffixTree + + +def longest_common_suffix_queries( + words_container: List[str], words_query: List[str] +) -> List[int]: + trie = SuffixTree() + + # 1. Build the Trie from words_container + for i, word in enumerate(words_container): + trie.insert(word, i) + + # 2. Process all queries + results = [] + for query_word in words_query: + # Search the Trie and get the original index of the best match + best_index = trie.search_best_index(query_word) + results.append(best_index) + + return results diff --git a/pystrings/longest_common_suffix_queries/images/examples/longest_common_suffix_queries_1.png b/pystrings/longest_common_suffix_queries/images/examples/longest_common_suffix_queries_1.png new file mode 100644 index 00000000..535be2c9 Binary files /dev/null and b/pystrings/longest_common_suffix_queries/images/examples/longest_common_suffix_queries_1.png differ diff --git a/pystrings/longest_common_suffix_queries/images/examples/longest_common_suffix_queries_2.png b/pystrings/longest_common_suffix_queries/images/examples/longest_common_suffix_queries_2.png new file mode 100644 index 00000000..1166c87c Binary files /dev/null and b/pystrings/longest_common_suffix_queries/images/examples/longest_common_suffix_queries_2.png differ diff --git a/pystrings/longest_common_suffix_queries/images/examples/longest_common_suffix_queries_3.png b/pystrings/longest_common_suffix_queries/images/examples/longest_common_suffix_queries_3.png new file mode 100644 index 00000000..e90d8e33 Binary files /dev/null and b/pystrings/longest_common_suffix_queries/images/examples/longest_common_suffix_queries_3.png differ diff --git a/pystrings/longest_common_suffix_queries/images/examples/longest_common_suffix_queries_4.png b/pystrings/longest_common_suffix_queries/images/examples/longest_common_suffix_queries_4.png new file mode 100644 index 00000000..61c9fea1 Binary files /dev/null and b/pystrings/longest_common_suffix_queries/images/examples/longest_common_suffix_queries_4.png differ diff --git a/pystrings/longest_common_suffix_queries/images/examples/longest_common_suffix_queries_5.png b/pystrings/longest_common_suffix_queries/images/examples/longest_common_suffix_queries_5.png new file mode 100644 index 00000000..4dffc41d Binary files /dev/null and b/pystrings/longest_common_suffix_queries/images/examples/longest_common_suffix_queries_5.png differ diff --git a/pystrings/longest_common_suffix_queries/test_longest_common_suffix_queries.py b/pystrings/longest_common_suffix_queries/test_longest_common_suffix_queries.py new file mode 100644 index 00000000..83935d1f --- /dev/null +++ b/pystrings/longest_common_suffix_queries/test_longest_common_suffix_queries.py @@ -0,0 +1,98 @@ +import unittest +from . import longest_common_suffix_queries + + +class LongestCommonSuffixQueriesTestCase(unittest.TestCase): + def test_1(self): + """should return [1,1,1] for words_container=["mango","ango","xango"] and words_query=["go","ango","xyz"]""" + words_container = ["mango", "ango", "xango"] + words_query = ["go", "ango", "xyz"] + expected = [1, 1, 1] + actual = longest_common_suffix_queries( + words_container=words_container, words_query=words_query + ) + self.assertEqual(expected, actual) + + def test_2(self): + """should return [0,0,0] for words_container=["flight", "night", "tight", "light"] and words_query=["ight","t","zzz"]""" + words_container = ["flight", "night", "tight", "light"] + words_query = ["ight", "t", "zzz"] + expected = [1, 1, 1] + actual = longest_common_suffix_queries( + words_container=words_container, words_query=words_query + ) + self.assertEqual(expected, actual) + + def test_3(self): + """should return [1,1,0] for words_container=["hello", "yellow", "mellow", "fellow"] and words_query=["low", "ellow", "wow"]""" + words_container = ["hello", "yellow", "mellow", "fellow"] + words_query = ["low", "ellow", "wow"] + expected = [1, 1, 1] + actual = longest_common_suffix_queries( + words_container=words_container, words_query=words_query + ) + self.assertEqual(expected, actual) + + def test_4(self): + """should return [3,3,3] for words_container=["cat", "start", "part", "art"] and words_query=["art", "rt", "xyz"]""" + words_container = ["cat", "start", "part", "art"] + words_query = ["art", "rt", "xyz"] + expected = [3, 3, 0] + actual = longest_common_suffix_queries( + words_container=words_container, words_query=words_query + ) + self.assertEqual(expected, actual) + + def test_5(self): + """should return [0,1,2] for words_container=["abcde", "bcde", "cde"] and words_query=["abcde", "bcde", "cde"]""" + words_container = ["abcde", "bcde", "cde"] + words_query = ["abcde", "bcde", "cde"] + expected = [0, 1, 2] + actual = longest_common_suffix_queries( + words_container=words_container, words_query=words_query + ) + self.assertEqual(expected, actual) + + def test_6(self): + """should return [2,2,2] for words_container=["starting","sting","ring"] and words_query=["ring","ing","random"]""" + words_container = ["starting", "sting", "ring"] + words_query = ["ring", "ing", "random"] + expected = [2, 2, 2] + actual = longest_common_suffix_queries( + words_container=words_container, words_query=words_query + ) + self.assertEqual(expected, actual) + + def test_7(self): + """should return [1,1,1] for words_container=["alpha","beta","gamma"] and words_query=["ta","eta","zeta"]""" + words_container = ["alpha", "beta", "gamma"] + words_query = ["ta", "eta", "zeta"] + expected = [1, 1, 1] + actual = longest_common_suffix_queries( + words_container=words_container, words_query=words_query + ) + self.assertEqual(expected, actual) + + def test_8(self): + """should return [2,2,2] for words_container=["respect","aspect","spect"] and words_query=["spect","ect","detect"]""" + words_container = ["respect", "aspect", "spect"] + words_query = ["spect", "ect", "detect"] + expected = [2, 2, 2] + actual = longest_common_suffix_queries( + words_container=words_container, words_query=words_query + ) + self.assertEqual(expected, actual) + + def test_9(self): + """should return [2,0,2] for words_container=["abcdefgh","poiuygh","ghghgh"] and words_query=["gh","acbfgh","acbfegh"]""" + words_container = ["abcdefgh", "poiuygh", "ghghgh"] + words_query = ["gh", "acbfgh", "acbfegh"] + expected = [2, 0, 2] + actual = longest_common_suffix_queries( + words_container=words_container, words_query=words_query + ) + self.assertEqual(expected, actual) + + +if __name__ == "__main__": + unittest.main()