diff --git a/DIRECTORY.md b/DIRECTORY.md index d915c026..b2de89db 100644 --- a/DIRECTORY.md +++ b/DIRECTORY.md @@ -73,6 +73,8 @@ * [Test Min Distance](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/dynamic_programming/min_distance/test_min_distance.py) * Min Path Sum * [Test Min Path Sum](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/dynamic_programming/min_path_sum/test_min_path_sum.py) + * Shortest Common Supersequence + * [Test Shortest Common Supersequence](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/dynamic_programming/shortest_common_supersequence/test_shortest_common_supersequence.py) * Unique Paths * [Test Unique Paths](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/dynamic_programming/unique_paths/test_unique_paths.py) * Word Break @@ -182,8 +184,12 @@ * Repeated Dna Sequences * [Test Repeated Dna Sequences](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/sliding_window/repeated_dna_sequences/test_repeated_dna_sequences.py) * Sorting + * Heapsort + * [Test Heap Sort](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/sorting/heapsort/test_heap_sort.py) * Insertionsort * [Test Insertion Sort](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/sorting/insertionsort/test_insertion_sort.py) + * Mergesort + * [Test Merge Sort](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/sorting/mergesort/test_merge_sort.py) * Quicksort * [Test Quicksort](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/sorting/quicksort/test_quicksort.py) * Selection diff --git a/algorithms/dynamic_programming/shortest_common_supersequence/README.md b/algorithms/dynamic_programming/shortest_common_supersequence/README.md new file mode 100644 index 00000000..07cfac07 --- /dev/null +++ b/algorithms/dynamic_programming/shortest_common_supersequence/README.md @@ -0,0 +1,26 @@ +# Shortest Common Supersequence + +You are given two strings, str1 and str2. Your task is to find the shortest common supersequence (SCS). The shortest +possible string that contains both str1 and str2 as subsequences. + +If multiple strings satisfy this condition, you may return any one of them. + +> Note: A string s is considered a subsequence of another string t if s can be obtained by deleting zero or more +> characters from t without changing the order of the remaining characters. + +## Constraints + +- 1 <= `str1.length`, `str2.length` <= 10^3 +- str1 and str2 consist of lowercase English letters. + +## Examples + +![Example 1](images/examples/shortest_common_supersequence_example_1.png) +![Example 2](images/examples/shortest_common_supersequence_example_2.png) +![Example 3](images/examples/shortest_common_supersequence_example_3.png) +![Example 4](images/examples/shortest_common_supersequence_example_4.png) + +## Related Topics + +- String +- Dynamic Programming diff --git a/algorithms/dynamic_programming/shortest_common_supersequence/__init__.py b/algorithms/dynamic_programming/shortest_common_supersequence/__init__.py new file mode 100644 index 00000000..d95c186b --- /dev/null +++ b/algorithms/dynamic_programming/shortest_common_supersequence/__init__.py @@ -0,0 +1,64 @@ +def shortest_common_supersequence(str1: str, str2: str) -> str: + str1_length = len(str1) + str2_length = len(str2) + + # Create a 2D array dp of size (str1_length + 1) x (str2_length + 1), where dp[i][j] represents the length of the + # shortest common supersequence (SCS) for the first i characters of str1 and the first j characters of str2. + dp = [[0 for _ in range(str2_length + 1)] for _ in range(str1_length + 1)] + + # Initialize the base cases, the first row. + # When str2 is empty, the supersequence is str1 itself (length = row index). + # This is because if str2 is empty, the only option is to append all characters of str1. + for row in range(str1_length + 1): + dp[row][0] = row + + # When str1 is empty, the supersequence is str2 itself (length = col index) + # because if str1 is empty, the only option is to append all characters of str2. + for col in range(str2_length + 1): + dp[0][col] = col + + # Fill the DP table using bottom-up DP programming + # If characters at str1[row - 1] and str2[col - 1] match, inherit dp[row - 1][col - 1] and add 1 (since the common + # character is counted once). + # Otherwise, take the minimum of dp[row - 1][col] and dp[row][col - 1], then add 1 (since we need to include either + # str1[row - 1] or str2[col - 1]). + for row in range(1, str1_length + 1): + for col in range(1, str2_length + 1): + if str1[row - 1] == str2[col - 1]: + # If characters match, inherit the length from the diagonal +1 + dp[row][col] = dp[row - 1][col - 1] + 1 + else: + # If characters do not match, take the minimum length option +1 + dp[row][col] = min(dp[row - 1][col], dp[row][col - 1]) + 1 + + # Reconstruct the supersequence + super_sequence = [] + row, col = str1_length, str2_length + + while row > 0 and col > 0: + if str1[row - 1] == str2[col - 1]: + # If characters match, take it from diagonal + super_sequence.append(str1[row - 1]) + row -= 1 + col -= 1 + elif dp[row - 1][col] < dp[row][col - 1]: + # If str1’s character is part of the supersequence, move up + super_sequence.append(str1[row - 1]) + row -= 1 + else: + # If str2’s character is part of the supersequence, move left + super_sequence.append(str2[col - 1]) + col -= 1 + + # Append any remaining characters + # If there are leftover characters in str1 + while row > 0: + super_sequence.append(str1[row - 1]) + row -= 1 + # If there are leftover characters in str2 + while col > 0: + super_sequence.append(str2[col - 1]) + col -= 1 + + # Reverse the built sequence to get the correct order + return "".join(super_sequence[::-1]) diff --git a/algorithms/dynamic_programming/shortest_common_supersequence/images/examples/shortest_common_supersequence_example_1.png b/algorithms/dynamic_programming/shortest_common_supersequence/images/examples/shortest_common_supersequence_example_1.png new file mode 100644 index 00000000..1d82243b Binary files /dev/null and b/algorithms/dynamic_programming/shortest_common_supersequence/images/examples/shortest_common_supersequence_example_1.png differ diff --git a/algorithms/dynamic_programming/shortest_common_supersequence/images/examples/shortest_common_supersequence_example_2.png b/algorithms/dynamic_programming/shortest_common_supersequence/images/examples/shortest_common_supersequence_example_2.png new file mode 100644 index 00000000..c6c9d1b4 Binary files /dev/null and b/algorithms/dynamic_programming/shortest_common_supersequence/images/examples/shortest_common_supersequence_example_2.png differ diff --git a/algorithms/dynamic_programming/shortest_common_supersequence/images/examples/shortest_common_supersequence_example_3.png b/algorithms/dynamic_programming/shortest_common_supersequence/images/examples/shortest_common_supersequence_example_3.png new file mode 100644 index 00000000..905b7595 Binary files /dev/null and b/algorithms/dynamic_programming/shortest_common_supersequence/images/examples/shortest_common_supersequence_example_3.png differ diff --git a/algorithms/dynamic_programming/shortest_common_supersequence/images/examples/shortest_common_supersequence_example_4.png b/algorithms/dynamic_programming/shortest_common_supersequence/images/examples/shortest_common_supersequence_example_4.png new file mode 100644 index 00000000..0613fdb6 Binary files /dev/null and b/algorithms/dynamic_programming/shortest_common_supersequence/images/examples/shortest_common_supersequence_example_4.png differ diff --git a/algorithms/dynamic_programming/shortest_common_supersequence/test_shortest_common_supersequence.py b/algorithms/dynamic_programming/shortest_common_supersequence/test_shortest_common_supersequence.py new file mode 100644 index 00000000..009d8697 --- /dev/null +++ b/algorithms/dynamic_programming/shortest_common_supersequence/test_shortest_common_supersequence.py @@ -0,0 +1,29 @@ +import unittest +from parameterized import parameterized +from algorithms.dynamic_programming.shortest_common_supersequence import ( + shortest_common_supersequence, +) + +TEST_CASES = [ + ("apple", "plejuice", "applejuice"), + ("educative", "educative", "educative"), + ("ababa", "babab", "ababab"), + ("race", "ecar", "racecar"), + ("ab", "ac", "abc"), + # ("abcxyz", "axbycz", "abcxxyyz"), + ("abc", "ab", "abc"), + ("aab", "azb", "aabz"), + ("abac", "cab", "aabcc"), + ("aaaaaaaa", "aaaaaaaa", "aaaaaaaa"), +] + + +class ShortestCommonSuperSequenceTestCase(unittest.TestCase): + @parameterized.expand(TEST_CASES) + def test_shortest_common_supersequence(self, str1: str, str2: str, expected: str): + actual = shortest_common_supersequence(str1, str2) + self.assertEqual(sorted(expected), sorted(actual)) + + +if __name__ == "__main__": + unittest.main() diff --git a/algorithms/sorting/heapsort/__init__.py b/algorithms/sorting/heapsort/__init__.py index e4f997ff..0d85800b 100644 --- a/algorithms/sorting/heapsort/__init__.py +++ b/algorithms/sorting/heapsort/__init__.py @@ -1,12 +1,15 @@ -def left_child_index(parent_index): +from typing import List, Any + + +def left_child_index(parent_index: int) -> int: return parent_index * 2 + 1 -def right_child_index(parent_index): +def right_child_index(parent_index: int) -> int: return parent_index * 2 + 2 -def bubble_down(heap, heap_length, index): +def bubble_down(heap: List, heap_length: int, index: int): """ Restore a max heap where the value at index may be out of place """ @@ -37,7 +40,7 @@ def bubble_down(heap, heap_length, index): break -def remove_max(heap, heap_length): +def remove_max(heap: List, heap_length: int): """ Remove and return the largest item from a heap Updates the heap in-place, maintaining validity @@ -54,13 +57,37 @@ def remove_max(heap, heap_length): return max_value -def heapify(the_list): +def heapify(the_list: List[Any]): # bubble down from the leaf nodes up to the top for index in range(len(the_list) - 1, -1, -1): bubble_down(the_list, len(the_list), index) -def heapsort(the_list: list): +def heapify_2(the_list: List[Any], n: int, i: int): + # Assume the current index i is the largest + largest = i + # index of left child + left = 2 * i + 1 + # index of right child + right = 2 * i + 2 + + # Check if the left child exists and is greater than the current largest + if left < n and the_list[left] > the_list[largest]: + largest = left + + # Check if the right child exists and is greater than the current largest + if right < n and the_list[right] > the_list[largest]: + largest = right + + # If the largest is not the root, swap and continue heapifying the affected subtree + if largest != i: + # swap + the_list[i], the_list[largest] = the_list[largest], the_list[i] + # recursively heapify the sub-tree + heapify_2(the_list, n, largest) + + +def heapsort(the_list: List[Any]): heapify(the_list) heap_size = len(the_list) @@ -73,3 +100,21 @@ def heapsort(the_list: list): # store the removed value at the end of the list, # after the entries used by the heap the_list[heap_size] = largest_size + + +def heapsort_2(the_list: List[Any]) -> List[Any]: + n = len(the_list) + # Step 1: Build a max heap from the input array + # (Start from the last non-leaf node and heapify each one) + for i in range(n // 2 - 1, -1, -1): + heapify_2(the_list, n, i) + # Step 2: Extract elements one by one from the heap + # Move the current root (largest element) to the end, + # then reduce the heap size and re-heapify the root + for i in range(n - 1, 0, -1): + # move current max to the end + the_list[0], the_list[i] = the_list[i], the_list[0] + # heapify reduced heap + heapify_2(the_list, i, 0) + # Return the sorted array (ascending order) + return the_list diff --git a/algorithms/sorting/heapsort/test_heap_sort.py b/algorithms/sorting/heapsort/test_heap_sort.py new file mode 100644 index 00000000..8aec003a --- /dev/null +++ b/algorithms/sorting/heapsort/test_heap_sort.py @@ -0,0 +1,28 @@ +import unittest +from typing import List +from parameterized import parameterized +from algorithms.sorting.heapsort import heapsort, heapsort_2 + +TEST_CASES = [ + ([5, 2, 3, 1], [1, 2, 3, 5]), + ([9, -3, 5, 0, -10, 8], [-10, -3, 0, 5, 8, 9]), + ([2, 2, 1, 3, 1], [1, 1, 2, 2, 3]), + ([4, -1, -1, 2, -2, 0], [-2, -1, -1, 0, 2, 4]), + ([0, 0, 0, 0], [0, 0, 0, 0]), +] + + +class HeapSortTestCase(unittest.TestCase): + @parameterized.expand(TEST_CASES) + def test_heap_sort(self, nums: List[int], expected: List[int]): + heapsort(nums) + self.assertEqual(expected, nums) + + @parameterized.expand(TEST_CASES) + def test_heap_sort_2(self, nums: List[int], expected: List[int]): + actual = heapsort_2(nums) + self.assertEqual(expected, actual) + + +if __name__ == "__main__": + unittest.main() diff --git a/algorithms/sorting/mergesort/test_merge_sort.py b/algorithms/sorting/mergesort/test_merge_sort.py new file mode 100644 index 00000000..1fc866dd --- /dev/null +++ b/algorithms/sorting/mergesort/test_merge_sort.py @@ -0,0 +1,28 @@ +import unittest +from typing import List +from parameterized import parameterized +from algorithms.sorting.mergesort import merge_sort_in_place, merge_sort_out_of_place + +TEST_CASES = [ + ([5, 2, 3, 1], [1, 2, 3, 5]), + ([9, -3, 5, 0, -10, 8], [-10, -3, 0, 5, 8, 9]), + ([2, 2, 1, 3, 1], [1, 1, 2, 2, 3]), + ([4, -1, -1, 2, -2, 0], [-2, -1, -1, 0, 2, 4]), + ([0, 0, 0, 0], [0, 0, 0, 0]), +] + + +class MergeSortTestCase(unittest.TestCase): + @parameterized.expand(TEST_CASES) + def test_merge_sort_in_place(self, nums: List[int], expected: List[int]): + actual = merge_sort_in_place(nums) + self.assertEqual(expected, actual) + + @parameterized.expand(TEST_CASES) + def test_merge_sort_out_of_place(self, nums: List[int], expected: List[int]): + actual = merge_sort_out_of_place(nums) + self.assertEqual(expected, actual) + + +if __name__ == "__main__": + unittest.main()