diff --git a/DIRECTORY.md b/DIRECTORY.md index 1a39d050..528220a5 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -265,6 +265,9 @@ * Minstack * [Test Min Stack](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/stacks/minstack/test_min_stack.py) * [Test Stacks](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/stacks/test_stacks.py) + * Streams + * Stream Checker + * [Test Stream Checker](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/streams/stream_checker/test_stream_checker.py) * Timemap * [Test Timemap](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/timemap/test_timemap.py) * Trees @@ -303,6 +306,9 @@ * Ternary * [Node](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/ternary/node.py) * [Test Ternary Tree Paths](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/ternary/test_ternary_tree_paths.py) + * Trie + * [Trie](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/trie/trie.py) + * [Trie Node](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/trees/trie/trie_node.py) * Tuples * [Named Tuples](https://github.com/BrianLusina/PythonSnips/blob/master/datastructures/tuples/named_tuples.py) diff --git a/datastructures/streams/__init__.py b/datastructures/streams/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/datastructures/streams/stream_checker/README.md b/datastructures/streams/stream_checker/README.md new file mode 100644 index 00000000..060c9090 --- /dev/null +++ b/datastructures/streams/stream_checker/README.md @@ -0,0 +1,30 @@ +# Stream of Characters + +Design a data structure that processes a stream of characters and, after each character is received, determines if a +suffix of these characters is a string in a given array of strings words. + +For example, if words = ["dog"] and the stream adds the characters ‘d’, ‘c’, ‘a’ , and ‘t’ in sequence, the algorithm +should detect that the suffix "cat" of the stream "dcat" matches the word "cat" from the list. + +So, for words, the goal is to detect if any of these words appear as a suffix of the stream built so far. To accomplish +this, implement a class StreamChecker: + +- **Constructor**: Initializes the object with the list of target words. +- **boolean query(char letter)**: Appends a character to the stream and returns TRUE if any suffix of the stream matches + a word in the list words. + +Constraints: + +- 1 ≤ words.length ≤ 1000 +- 1 ≤ words[i].length ≤ 200 +- words[i] consists of lowercase English letters. +- letter is a lowercase English letter. +- At most 4 * 10^2 calls will be made to query. + +Examples: + +![Example 1](./images/examples/stream_checker_example_1.png) +![Example 2](./images/examples/stream_checker_example_2.png) +![Example 3](./images/examples/stream_checker_example_3.png) + + diff --git a/datastructures/streams/stream_checker/__init__.py b/datastructures/streams/stream_checker/__init__.py new file mode 100644 index 00000000..02d7d02f --- /dev/null +++ b/datastructures/streams/stream_checker/__init__.py @@ -0,0 +1,73 @@ +from typing import Deque, List +from collections import deque +from datastructures.trees.trie import TrieNode + + +class StreamChecker(object): + + def __init__(self, words: List[str]): + """ + Initializes a StreamChecker instance. + + Constructor Time: O(Ltotal), where Ltotal is the sum of the lengths of all words. This is a one-time cost. + + Parameters: + words (List[str]): List of words to be checked in the stream. + + Returns: + instance of streamchecker + """ + self.words = words + self.trie = TrieNode() + self.max_len = 0 + self.__build_trie() + # deque(maxlen) is key for stream history optimization + self.stream: Deque[str] = deque(maxlen=self.max_len) + + def __build_trie(self): + # insert the words in reverse order into the trie + for word in self.words[::-1]: + # 1. track max length for deque optimization + if len(word) > self.max_len: + self.max_len = len(word) + + current = self.trie + # 2. insert characters in reverse order + for letter in word[::-1]: + current = current.children[letter] + + # 3. Mark the end of the reversed word + current.is_end = True + + def query(self, letter: str) -> bool: + """ + Query Time: O(L), where L is the length of the stream. This is because we only traverse the trie up to the + length of the stream. + + Query Time: O(Lmax), where Lmax is the length of the longest word (up to 200). Since this is a constant limit, + we can treat this as O(1) amortized time per query. + + Parameters: + letter (str): The next letter in the stream. + + Returns: + bool: True if the letter is the end of a word, False otherwise. + """ + self.stream.append(letter) + current = self.trie + + # Iterate stream in reverse (newest character first) + for character in reversed(self.stream): + # Check for dead end (critical for query logic) + if character not in current.children: + return False + + # Traverse to the next node + current = current.children[character] + + # check for match(success condition) + if current.is_end: + return True + + # If loop finishes without a match + return False diff --git a/datastructures/streams/stream_checker/images/examples/stream_checker_example_1.png b/datastructures/streams/stream_checker/images/examples/stream_checker_example_1.png new file mode 100644 index 00000000..d1309d11 Binary files /dev/null and b/datastructures/streams/stream_checker/images/examples/stream_checker_example_1.png differ diff --git a/datastructures/streams/stream_checker/images/examples/stream_checker_example_2.png b/datastructures/streams/stream_checker/images/examples/stream_checker_example_2.png new file mode 100644 index 00000000..8a663fb1 Binary files /dev/null and b/datastructures/streams/stream_checker/images/examples/stream_checker_example_2.png differ diff --git a/datastructures/streams/stream_checker/images/examples/stream_checker_example_3.png b/datastructures/streams/stream_checker/images/examples/stream_checker_example_3.png new file mode 100644 index 00000000..7d9ea2c2 Binary files /dev/null and b/datastructures/streams/stream_checker/images/examples/stream_checker_example_3.png differ diff --git a/datastructures/streams/stream_checker/test_stream_checker.py b/datastructures/streams/stream_checker/test_stream_checker.py new file mode 100644 index 00000000..08c011c2 --- /dev/null +++ b/datastructures/streams/stream_checker/test_stream_checker.py @@ -0,0 +1,35 @@ +import unittest +from . import StreamChecker + + +class StreamCheckerTestCase(unittest.TestCase): + def test_1(self): + words = ["go", "hi"] + stream = StreamChecker(words) + self.assertFalse(stream.query("h")) + self.assertTrue(stream.query("i")) + self.assertFalse(stream.query("g")) + self.assertTrue(stream.query("o")) + self.assertFalse(stream.query("x")) + self.assertFalse(stream.query("y")) + + def test_2(self): + words = ["no", "yes"] + stream = StreamChecker(words) + self.assertFalse(stream.query("y")) + self.assertFalse(stream.query("e")) + self.assertTrue(stream.query("s")) + self.assertFalse(stream.query("n")) + self.assertTrue(stream.query("o")) + + def test_3(self): + words = ["a", "aa"] + stream = StreamChecker(words) + self.assertTrue(stream.query("a")) + self.assertTrue(stream.query("a")) + self.assertTrue(stream.query("a")) + self.assertFalse(stream.query("b")) + + +if __name__ == '__main__': + unittest.main() diff --git a/datastructures/trees/trie/__init__.py b/datastructures/trees/trie/__init__.py index c7b323ed..0e511794 100644 --- a/datastructures/trees/trie/__init__.py +++ b/datastructures/trees/trie/__init__.py @@ -1,65 +1,8 @@ -from collections import defaultdict -from typing import List +from datastructures.trees.trie.trie_node import TrieNode +from datastructures.trees.trie.trie import Trie -class TrieNode: - def __init__(self, char: str): - self.char = char - self.children = defaultdict(TrieNode) - self.is_end = False - - -class Trie: - def __init__(self): - self.root = TrieNode("") - - def insert(self, word: str) -> None: - curr = self.root - - for char in word: - if char in curr.children: - curr = curr.children[char] - - else: - new_node = TrieNode(char) - curr.children[char] = new_node - curr = new_node - - curr.is_end = True - - def search(self, word: str) -> List[str]: - curr = self.root - - if len(word) == 0: - return [] - - for char in word: - if char in curr.children: - curr = curr.children[char] - else: - return [] - - output = [] - - def dfs(node: TrieNode, prefix: str) -> None: - if node.is_end: - output.append((prefix + node.char)) - - for child in node.children.values(): - dfs(child, prefix + node.char) - - dfs(curr, word[:-1]) - return output - - def starts_with(self, prefix: str) -> bool: - """ - Returns true if the given prefix is a prefix of any word in the trie. - """ - curr = self.root - - for char in prefix: - if char not in curr.children: - return False - curr = curr.children[char] - - return True +__all__ = [ + "Trie", + "TrieNode" +] diff --git a/datastructures/trees/trie/trie.py b/datastructures/trees/trie/trie.py new file mode 100644 index 00000000..c1b0e2de --- /dev/null +++ b/datastructures/trees/trie/trie.py @@ -0,0 +1,57 @@ +from typing import List +from datastructures.trees.trie.trie_node import TrieNode + + +class Trie: + def __init__(self): + self.root = TrieNode() + + def insert(self, word: str) -> None: + curr = self.root + + for char in word: + if char in curr.children: + curr = curr.children[char] + else: + new_node = TrieNode() + curr.children[char] = new_node + curr = new_node + + curr.is_end = True + + def search(self, word: str) -> List[str]: + curr = self.root + + if len(word) == 0: + return [] + + for char in word: + if char in curr.children: + curr = curr.children[char] + else: + return [] + + output = [] + + def dfs(node: TrieNode, prefix: str) -> None: + if node.is_end: + output.append((prefix + node.char)) + + for child in node.children.values(): + dfs(child, prefix + node.char) + + dfs(curr, word[:-1]) + return output + + def starts_with(self, prefix: str) -> bool: + """ + Returns true if the given prefix is a prefix of any word in the trie. + """ + curr = self.root + + for char in prefix: + if char not in curr.children: + return False + curr = curr.children[char] + + return True diff --git a/datastructures/trees/trie/trie_node.py b/datastructures/trees/trie/trie_node.py new file mode 100644 index 00000000..bcab413b --- /dev/null +++ b/datastructures/trees/trie/trie_node.py @@ -0,0 +1,23 @@ +from typing import DefaultDict +from collections import defaultdict + + +class TrieNode: + def __init__(self): + # self.char = char + """ + Initializes a TrieNode instance. + + A TrieNode contains a character and a dictionary of its children. It also contains a boolean indicating whether the node is the end of a word in the Trie. + + Parameters: + None + + Returns: + None + """ + self.children: DefaultDict[str, TrieNode] = defaultdict(TrieNode) + self.is_end = False + + def __repr__(self): + return f"TrieNode({self.children.items()}, {self.is_end})"