Skip to content

Commit 3bbb69d

Browse files
committed
feat(strings, backtracking): remove invalid parenthesis
1 parent 01725b5 commit 3bbb69d

File tree

14 files changed

+304
-13
lines changed

14 files changed

+304
-13
lines changed

pystrings/lexicographically_largest_string/test_lexicographically_largest_string.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2,7 +2,7 @@
22
from parameterized import parameterized
33
from pystrings.lexicographically_largest_string import (
44
lexicographically_largest_string_from_box,
5-
lexicographically_largest_string_from_box_2
5+
lexicographically_largest_string_from_box_2,
66
)
77

88

pystrings/parenthesis/__init__.py

Whitespace-only changes.
File renamed without changes.
File renamed without changes.

pystrings/balanced_paren/test_balanced_paren.py renamed to pystrings/parenthesis/balanced_paren/test_balanced_paren.py

Lines changed: 10 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -1,10 +1,10 @@
11
import unittest
22

3-
from pystrings.balanced_paren import balanced_parens
3+
from pystrings.parenthesis.balanced_paren import balanced_parens
44

55

66
class BalancedParensTestCase(unittest.TestCase):
7-
def sample_tests(self):
7+
def test_sample(self):
88
for n, exp in [
99
[0, [""]],
1010
[1, ["()"]],
@@ -15,7 +15,7 @@ def sample_tests(self):
1515
actual.sort()
1616
self.assertEqual(actual, exp)
1717

18-
def random_tests(self):
18+
def test_random(self):
1919
def ref_sol(n):
2020
return list(dfs([], 0, 0, n))
2121

@@ -37,14 +37,14 @@ def dfs(s, open_count, close_count, max_p):
3737
rng = list(range(13))
3838
shuffle(rng)
3939
for n in rng:
40-
exp = ref_sol(n)
41-
act = balanced_parens(n)
42-
exp.sort()
43-
act.sort()
44-
if len(exp) > 1000:
45-
self.assertEqual(exp == act, "Nope...(n={})".format(n))
40+
expected = ref_sol(n)
41+
actual = balanced_parens(n)
42+
expected.sort()
43+
actual.sort()
44+
if len(expected) > 1000:
45+
self.assertEqual(expected, actual, "Nope...(n={})".format(n))
4646
else:
47-
self.assertEqual(act, exp)
47+
self.assertEqual(expected, actual)
4848

4949

5050
if __name__ == "__main__":

pystrings/paren_matcher/README.md renamed to pystrings/parenthesis/paren_matcher/README.md

Lines changed: 6 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,5 @@
1+
# Parenthesis Matcher
2+
13
I like parentheticals (a lot).
24
"Sometimes (when I nest them (my parentheticals) too much (like this (and this))) they get confusing."
35

@@ -20,8 +22,10 @@ keep a count of how many additional "(" we find as open_nested_parens. When we f
2022
open_nested_parens. If we find a ")" and open_nested_parens is 0, we know that ")" closes our initial "(", so we return
2123
its position.
2224

25+
```python
2326
def get_closing_paren(sentence, opening_paren_index):
24-
open_nested_parens = 0 position = opening_paren_index + 1
27+
open_nested_parens = 0
28+
position = opening_paren_index + 1
2529

2630
while position <= len(sentence) - 1:
2731
char = sentence[position]
@@ -37,6 +41,7 @@ open_nested_parens = 0 position = opening_paren_index + 1
3741
position += 1
3842

3943
raise Exception("No closing parenthesis :(")
44+
```
4045

4146
Complexity O(n) time, where nn is the number of chars in the string. O(1)O(1) space.
4247

pystrings/paren_matcher/__init__.py renamed to pystrings/parenthesis/paren_matcher/__init__.py

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
def closing_paren(sentence, open_paren_index):
1+
def closing_paren(sentence: str, open_paren_index: int) -> int:
22
"""
33
44
:param: sentence the sentence to search through
Lines changed: 102 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,102 @@
1+
# Remove Invalid Parentheses
2+
3+
You are given a string, `s`, that contains:
4+
- Lowercase English letters
5+
- Opening '(' and closing ')' parentheses
6+
7+
A string is considered valid if:
8+
9+
1. All opening parentheses '(' are closed properly by a matching ')'.
10+
2. The parentheses are in the correct order and nesting.
11+
3. Letters can appear anywhere and do not affect validity.
12+
13+
Return all possible valid strings that can be formed by removing the minimum number of invalid parentheses. The answer
14+
must be a list of unique strings, in any order.
15+
16+
**Constraints**
17+
18+
- 1 ≤ `s.length` ≤ 25
19+
- `s` consists of lowercase English letters and parentheses `'('` and `')'`.
20+
- There will be at most `20` parentheses in `s`.
21+
22+
## Examples
23+
24+
![Example 1](./images/examples/remove_invalid_parenthesis_example_1.png)
25+
![Example 2](./images/examples/remove_invalid_parenthesis_example_2.png)
26+
![Example 3](./images/examples/remove_invalid_parenthesis_example_3.png)
27+
![Example 4](./images/examples/remove_invalid_parenthesis_example_4.png)
28+
29+
## Solution
30+
31+
The algorithm aims to generate all possible valid strings by removing the minimum number of invalid parentheses. It
32+
starts with a preprocessing step to determine how many opening and closing parentheses need to be removed. As it scans
33+
the string, it increments a counter for each opening parenthesis. For each closing parenthesis, it tries to match it
34+
with an opening one. If no match is found, it marks the closing parenthesis as unmatched. This ensures the algorithm
35+
knows the minimum number of each type of parenthesis to remove.
36+
37+
Once the number of invalid parentheses is known, the algorithm uses recursive backtracking to explore all valid
38+
combinations. It processes the string one character at a time and considers several choices:
39+
40+
- If the character is an opening parenthesis (, the algorithm considers two choices: skip it to reduce the number of
41+
unmatched openings, or add it to the expression and increase the open count.
42+
43+
- If it’s a closing parenthesis ), it can be skipped to reduce unmatched closings, or added to the expression—but only
44+
if more opening parentheses are already added, to keep the expression balanced.
45+
46+
- If it’s a non-parenthesis character, it is always added to the current expression.
47+
48+
The recursion continues until the end of the string is reached. At that point, if the number of opening and closing
49+
parentheses is equal (i.e., the expression is balanced) and no more removals are needed, the expression is added to a
50+
result set to ensure uniqueness. After exploring all possibilities, the algorithm returns all unique valid expressions
51+
as a list.
52+
53+
The steps of the algorithm are as follows:
54+
55+
1. Count the minimum invalid parentheses to remove:
56+
- Initialize two counters, `left_to_remove` (Number of extra '(' to remove) and `right_to_remove` (Number of extra
57+
`')'` to remove).
58+
- Iterate through the string:
59+
- If the character is `'('`, increment `left_to_remove`.
60+
- If the character is `')'`:
61+
- If `left_to_remove` > 0, a matching '(' exists, so decrement `left_to_remove`.
62+
- Else, increment `right_to_remove` (unmatched right parenthesis).
63+
64+
2. We define a recursive helper function, `backtrack(index, open_count, close_count, path, left_remain, right_remain)`
65+
to explore all valid combinations of the input string. index is the current position in the string. `open_count` and
66+
`close_count` track the number of '(' and ')' in the current path to maintain balance. path holds the built string
67+
so far. `left_remain` and `right_remain` indicate how many `'('` and `')'` can still be removed to form a valid expression.
68+
69+
- When the end of the string is reached (i.e., `index == len(s)`), check if no removals remain (`left_remain == 0` and
70+
`right_remain == 0`) and the parentheses are balanced (`open_count == close_count`). If so, add the current path to
71+
the result set.
72+
- Recursive case: At each character `char = s[index]`:
73+
- If `char == '('`:
74+
- Option to remove it (if `left_remain > 0`):
75+
- Recurse without adding '(', decrementing `left_remain`.
76+
- Option to keep it:
77+
- Recurse adding `'('` to path, incrementing `open_count`.
78+
- If `char == ')'`:
79+
- Option to remove it (if `right_remain > 0`):
80+
- Recurse without adding `')'`, decrementing `right_remain`.
81+
- Option to keep it:
82+
- Only if `close_count < open_count` (to ensure balance).
83+
- Recurse adding `')'`, incrementing `close_count`.
84+
85+
- If char is not a parenthesis:
86+
- Always keep it and recurse with the character added to `path`.
87+
88+
3. Call `backtrack(0, 0, 0, '', left_to_remove, right_to_remove)` to begin from index 0 and an empty path.
89+
4. Convert the result set `result` to a list and return it.
90+
91+
### Time Complexity
92+
93+
At each step of the algorithm, a parenthesis can either be removed or kept, which results in two choices per parenthesis.
94+
Therefore, for a string with `n` parentheses, the total number of combinations explored by the backtracking algorithm
95+
can grow exponentially, leading to a branching factor of `O(2^n)`. Since letters are always included without affecting
96+
the decision tree, they do not contribute to the branching complexity. As a result, the overall time complexity of the
97+
algorithm in the worst case is `O(2^n)`.
98+
99+
### Space Complexity
100+
101+
The space complexity of the above solution is `O(n)`, where `n` is the length of the input string because the recursive
102+
call stack in the backtracking algorithm can reach a depth of `n`.
Lines changed: 137 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,137 @@
1+
from typing import List, Set, Deque
2+
3+
4+
def is_valid(input_str: str) -> bool:
5+
"""
6+
Check if a string has a valid parenthesis structure. Letters are ignored in validation
7+
"""
8+
# count tracks the number of open parenthesis
9+
count = 0
10+
for char in input_str:
11+
if char == "(":
12+
count += 1
13+
elif char == ")":
14+
count -= 1
15+
# if the count goes to negative, we have an ')' without an open '('
16+
if count < 0:
17+
return False
18+
# valid only if all '(' are closed i.e. the count is 0
19+
return count == 0
20+
21+
22+
def remove_invalid_parentheses(s: str) -> List[str]:
23+
# The set 'level' stores all unique strings for the current number of removals.
24+
# We start with the original string.
25+
level: Set[str] = {s}
26+
27+
while True:
28+
valid_strings = []
29+
30+
# Check all strings in the current level
31+
for string in level:
32+
if is_valid(string):
33+
valid_strings.append(string)
34+
35+
# If we found any valid strings in this level, return them.
36+
# Because it's BFS, these are guaranteed to be the "minimum removals".
37+
if valid_strings:
38+
return valid_strings
39+
40+
# If no valid strings found, generate the next level
41+
next_level: Set[str] = set()
42+
for string in level:
43+
for i in range(len(string)):
44+
# Only try removing parentheses, skip letters
45+
if string[i] in "()":
46+
# Create a new string excluding the character at index i
47+
new_string = string[:i] + string[i + 1 :]
48+
next_level.add(new_string)
49+
50+
# Move to the newly generated level
51+
level = next_level
52+
53+
# Safety check (rare case where level becomes empty)
54+
if not level:
55+
return [""]
56+
57+
58+
def remove_invalid_parentheses_2(s):
59+
result: Set[str] = set()
60+
61+
# Step 1: Determine how many left and right parentheses need to be removed
62+
left_to_remove = right_to_remove = 0
63+
for char in s:
64+
if char == "(":
65+
left_to_remove += 1
66+
elif char == ")":
67+
if left_to_remove > 0:
68+
left_to_remove -= 1
69+
else:
70+
right_to_remove += 1
71+
72+
# Step 2: Backtracking function
73+
def backtrack(index, open_count, close_count, path, left_remain, right_remain):
74+
if index == len(s):
75+
if left_remain == 0 and right_remain == 0 and open_count == close_count:
76+
result.add(path)
77+
return
78+
79+
char = s[index]
80+
81+
if char == "(":
82+
# Option 1: remove the '('
83+
if left_remain > 0:
84+
backtrack(
85+
index + 1,
86+
open_count,
87+
close_count,
88+
path,
89+
left_remain - 1,
90+
right_remain,
91+
)
92+
# Option 2: keep the '('
93+
backtrack(
94+
index + 1,
95+
open_count + 1,
96+
close_count,
97+
path + char,
98+
left_remain,
99+
right_remain,
100+
)
101+
102+
elif char == ")":
103+
# Option 1: remove the ')'
104+
if right_remain > 0:
105+
backtrack(
106+
index + 1,
107+
open_count,
108+
close_count,
109+
path,
110+
left_remain,
111+
right_remain - 1,
112+
)
113+
# Option 2: keep the ')' if it balances a previous '('
114+
if close_count < open_count:
115+
backtrack(
116+
index + 1,
117+
open_count,
118+
close_count + 1,
119+
path + char,
120+
left_remain,
121+
right_remain,
122+
)
123+
124+
else:
125+
# Always include non-parenthesis characters
126+
backtrack(
127+
index + 1,
128+
open_count,
129+
close_count,
130+
path + char,
131+
left_remain,
132+
right_remain,
133+
)
134+
135+
# Step 3: Start backtracking
136+
backtrack(0, 0, 0, "", left_to_remove, right_to_remove)
137+
return list(result)
65.3 KB
Loading

0 commit comments

Comments
 (0)