Skip to content

Commit 8f7d65b

Browse files
authored
Merge pull request #139 from BrianLusina/feat/strings-shortest-common-supersequence
feat(algorithms, strings): shortest common supersequence
2 parents 599b271 + f366e87 commit 8f7d65b

File tree

11 files changed

+232
-6
lines changed

11 files changed

+232
-6
lines changed

DIRECTORY.md

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,8 @@
7373
* [Test Min Distance](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/dynamic_programming/min_distance/test_min_distance.py)
7474
* Min Path Sum
7575
* [Test Min Path Sum](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/dynamic_programming/min_path_sum/test_min_path_sum.py)
76+
* Shortest Common Supersequence
77+
* [Test Shortest Common Supersequence](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/dynamic_programming/shortest_common_supersequence/test_shortest_common_supersequence.py)
7678
* Unique Paths
7779
* [Test Unique Paths](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/dynamic_programming/unique_paths/test_unique_paths.py)
7880
* Word Break
@@ -182,8 +184,12 @@
182184
* Repeated Dna Sequences
183185
* [Test Repeated Dna Sequences](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/sliding_window/repeated_dna_sequences/test_repeated_dna_sequences.py)
184186
* Sorting
187+
* Heapsort
188+
* [Test Heap Sort](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/sorting/heapsort/test_heap_sort.py)
185189
* Insertionsort
186190
* [Test Insertion Sort](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/sorting/insertionsort/test_insertion_sort.py)
191+
* Mergesort
192+
* [Test Merge Sort](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/sorting/mergesort/test_merge_sort.py)
187193
* Quicksort
188194
* [Test Quicksort](https://github.com/BrianLusina/PythonSnips/blob/master/algorithms/sorting/quicksort/test_quicksort.py)
189195
* Selection
Lines changed: 26 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,26 @@
1+
# Shortest Common Supersequence
2+
3+
You are given two strings, str1 and str2. Your task is to find the shortest common supersequence (SCS). The shortest
4+
possible string that contains both str1 and str2 as subsequences.
5+
6+
If multiple strings satisfy this condition, you may return any one of them.
7+
8+
> Note: A string s is considered a subsequence of another string t if s can be obtained by deleting zero or more
9+
> characters from t without changing the order of the remaining characters.
10+
11+
## Constraints
12+
13+
- 1 <= `str1.length`, `str2.length` <= 10^3
14+
- str1 and str2 consist of lowercase English letters.
15+
16+
## Examples
17+
18+
![Example 1](images/examples/shortest_common_supersequence_example_1.png)
19+
![Example 2](images/examples/shortest_common_supersequence_example_2.png)
20+
![Example 3](images/examples/shortest_common_supersequence_example_3.png)
21+
![Example 4](images/examples/shortest_common_supersequence_example_4.png)
22+
23+
## Related Topics
24+
25+
- String
26+
- Dynamic Programming
Lines changed: 64 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,64 @@
1+
def shortest_common_supersequence(str1: str, str2: str) -> str:
2+
str1_length = len(str1)
3+
str2_length = len(str2)
4+
5+
# Create a 2D array dp of size (str1_length + 1) x (str2_length + 1), where dp[i][j] represents the length of the
6+
# shortest common supersequence (SCS) for the first i characters of str1 and the first j characters of str2.
7+
dp = [[0 for _ in range(str2_length + 1)] for _ in range(str1_length + 1)]
8+
9+
# Initialize the base cases, the first row.
10+
# When str2 is empty, the supersequence is str1 itself (length = row index).
11+
# This is because if str2 is empty, the only option is to append all characters of str1.
12+
for row in range(str1_length + 1):
13+
dp[row][0] = row
14+
15+
# When str1 is empty, the supersequence is str2 itself (length = col index)
16+
# because if str1 is empty, the only option is to append all characters of str2.
17+
for col in range(str2_length + 1):
18+
dp[0][col] = col
19+
20+
# Fill the DP table using bottom-up DP programming
21+
# If characters at str1[row - 1] and str2[col - 1] match, inherit dp[row - 1][col - 1] and add 1 (since the common
22+
# character is counted once).
23+
# Otherwise, take the minimum of dp[row - 1][col] and dp[row][col - 1], then add 1 (since we need to include either
24+
# str1[row - 1] or str2[col - 1]).
25+
for row in range(1, str1_length + 1):
26+
for col in range(1, str2_length + 1):
27+
if str1[row - 1] == str2[col - 1]:
28+
# If characters match, inherit the length from the diagonal +1
29+
dp[row][col] = dp[row - 1][col - 1] + 1
30+
else:
31+
# If characters do not match, take the minimum length option +1
32+
dp[row][col] = min(dp[row - 1][col], dp[row][col - 1]) + 1
33+
34+
# Reconstruct the supersequence
35+
super_sequence = []
36+
row, col = str1_length, str2_length
37+
38+
while row > 0 and col > 0:
39+
if str1[row - 1] == str2[col - 1]:
40+
# If characters match, take it from diagonal
41+
super_sequence.append(str1[row - 1])
42+
row -= 1
43+
col -= 1
44+
elif dp[row - 1][col] < dp[row][col - 1]:
45+
# If str1’s character is part of the supersequence, move up
46+
super_sequence.append(str1[row - 1])
47+
row -= 1
48+
else:
49+
# If str2’s character is part of the supersequence, move left
50+
super_sequence.append(str2[col - 1])
51+
col -= 1
52+
53+
# Append any remaining characters
54+
# If there are leftover characters in str1
55+
while row > 0:
56+
super_sequence.append(str1[row - 1])
57+
row -= 1
58+
# If there are leftover characters in str2
59+
while col > 0:
60+
super_sequence.append(str2[col - 1])
61+
col -= 1
62+
63+
# Reverse the built sequence to get the correct order
64+
return "".join(super_sequence[::-1])
147 KB
Loading
133 KB
Loading
137 KB
Loading
134 KB
Loading
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
import unittest
2+
from parameterized import parameterized
3+
from algorithms.dynamic_programming.shortest_common_supersequence import (
4+
shortest_common_supersequence,
5+
)
6+
7+
TEST_CASES = [
8+
("apple", "plejuice", "applejuice"),
9+
("educative", "educative", "educative"),
10+
("ababa", "babab", "ababab"),
11+
("race", "ecar", "racecar"),
12+
("ab", "ac", "abc"),
13+
# ("abcxyz", "axbycz", "abcxxyyz"),
14+
("abc", "ab", "abc"),
15+
("aab", "azb", "aabz"),
16+
("abac", "cab", "aabcc"),
17+
("aaaaaaaa", "aaaaaaaa", "aaaaaaaa"),
18+
]
19+
20+
21+
class ShortestCommonSuperSequenceTestCase(unittest.TestCase):
22+
@parameterized.expand(TEST_CASES)
23+
def test_shortest_common_supersequence(self, str1: str, str2: str, expected: str):
24+
actual = shortest_common_supersequence(str1, str2)
25+
self.assertEqual(sorted(expected), sorted(actual))
26+
27+
28+
if __name__ == "__main__":
29+
unittest.main()

algorithms/sorting/heapsort/__init__.py

Lines changed: 51 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1,12 +1,15 @@
1-
def left_child_index(parent_index):
1+
from typing import List, Any
2+
3+
4+
def left_child_index(parent_index: int) -> int:
25
return parent_index * 2 + 1
36

47

5-
def right_child_index(parent_index):
8+
def right_child_index(parent_index: int) -> int:
69
return parent_index * 2 + 2
710

811

9-
def bubble_down(heap, heap_length, index):
12+
def bubble_down(heap: List, heap_length: int, index: int):
1013
"""
1114
Restore a max heap where the value at index may be out of place
1215
"""
@@ -37,7 +40,7 @@ def bubble_down(heap, heap_length, index):
3740
break
3841

3942

40-
def remove_max(heap, heap_length):
43+
def remove_max(heap: List, heap_length: int):
4144
"""
4245
Remove and return the largest item from a heap
4346
Updates the heap in-place, maintaining validity
@@ -54,13 +57,37 @@ def remove_max(heap, heap_length):
5457
return max_value
5558

5659

57-
def heapify(the_list):
60+
def heapify(the_list: List[Any]):
5861
# bubble down from the leaf nodes up to the top
5962
for index in range(len(the_list) - 1, -1, -1):
6063
bubble_down(the_list, len(the_list), index)
6164

6265

63-
def heapsort(the_list: list):
66+
def heapify_2(the_list: List[Any], n: int, i: int):
67+
# Assume the current index i is the largest
68+
largest = i
69+
# index of left child
70+
left = 2 * i + 1
71+
# index of right child
72+
right = 2 * i + 2
73+
74+
# Check if the left child exists and is greater than the current largest
75+
if left < n and the_list[left] > the_list[largest]:
76+
largest = left
77+
78+
# Check if the right child exists and is greater than the current largest
79+
if right < n and the_list[right] > the_list[largest]:
80+
largest = right
81+
82+
# If the largest is not the root, swap and continue heapifying the affected subtree
83+
if largest != i:
84+
# swap
85+
the_list[i], the_list[largest] = the_list[largest], the_list[i]
86+
# recursively heapify the sub-tree
87+
heapify_2(the_list, n, largest)
88+
89+
90+
def heapsort(the_list: List[Any]):
6491
heapify(the_list)
6592

6693
heap_size = len(the_list)
@@ -73,3 +100,21 @@ def heapsort(the_list: list):
73100
# store the removed value at the end of the list,
74101
# after the entries used by the heap
75102
the_list[heap_size] = largest_size
103+
104+
105+
def heapsort_2(the_list: List[Any]) -> List[Any]:
106+
n = len(the_list)
107+
# Step 1: Build a max heap from the input array
108+
# (Start from the last non-leaf node and heapify each one)
109+
for i in range(n // 2 - 1, -1, -1):
110+
heapify_2(the_list, n, i)
111+
# Step 2: Extract elements one by one from the heap
112+
# Move the current root (largest element) to the end,
113+
# then reduce the heap size and re-heapify the root
114+
for i in range(n - 1, 0, -1):
115+
# move current max to the end
116+
the_list[0], the_list[i] = the_list[i], the_list[0]
117+
# heapify reduced heap
118+
heapify_2(the_list, i, 0)
119+
# Return the sorted array (ascending order)
120+
return the_list
Lines changed: 28 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,28 @@
1+
import unittest
2+
from typing import List
3+
from parameterized import parameterized
4+
from algorithms.sorting.heapsort import heapsort, heapsort_2
5+
6+
TEST_CASES = [
7+
([5, 2, 3, 1], [1, 2, 3, 5]),
8+
([9, -3, 5, 0, -10, 8], [-10, -3, 0, 5, 8, 9]),
9+
([2, 2, 1, 3, 1], [1, 1, 2, 2, 3]),
10+
([4, -1, -1, 2, -2, 0], [-2, -1, -1, 0, 2, 4]),
11+
([0, 0, 0, 0], [0, 0, 0, 0]),
12+
]
13+
14+
15+
class HeapSortTestCase(unittest.TestCase):
16+
@parameterized.expand(TEST_CASES)
17+
def test_heap_sort(self, nums: List[int], expected: List[int]):
18+
heapsort(nums)
19+
self.assertEqual(expected, nums)
20+
21+
@parameterized.expand(TEST_CASES)
22+
def test_heap_sort_2(self, nums: List[int], expected: List[int]):
23+
actual = heapsort_2(nums)
24+
self.assertEqual(expected, actual)
25+
26+
27+
if __name__ == "__main__":
28+
unittest.main()

0 commit comments

Comments
 (0)