Skip to content

Improve min_cost_string_conversion with validation, case-insensitive option, and more tests #12886

New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Open
wants to merge 2 commits into
base: master
Choose a base branch
from
Open
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
210 changes: 42 additions & 168 deletions strings/min_cost_string_conversion.py
Original file line number Diff line number Diff line change
@@ -1,170 +1,44 @@
"""
Algorithm for calculating the most cost-efficient sequence for converting one string
into another.
The only allowed operations are
--- Cost to copy a character is copy_cost
--- Cost to replace a character is replace_cost
--- Cost to delete a character is delete_cost
--- Cost to insert a character is insert_cost
"""


def compute_transform_tables(
source_string: str,
destination_string: str,
copy_cost: int,
replace_cost: int,
delete_cost: int,
insert_cost: int,
) -> tuple[list[list[int]], list[list[str]]]:
"""
Finds the most cost efficient sequence
for converting one string into another.

>>> costs, operations = compute_transform_tables("cat", "cut", 1, 2, 3, 3)
>>> costs[0][:4]
[0, 3, 6, 9]
>>> costs[2][:4]
[6, 4, 3, 6]
>>> operations[0][:4]
['0', 'Ic', 'Iu', 'It']
>>> operations[3][:4]
['Dt', 'Dt', 'Rtu', 'Ct']

>>> compute_transform_tables("", "", 1, 2, 3, 3)
([[0]], [['0']])
"""
source_seq = list(source_string)
destination_seq = list(destination_string)
len_source_seq = len(source_seq)
len_destination_seq = len(destination_seq)
costs = [
[0 for _ in range(len_destination_seq + 1)] for _ in range(len_source_seq + 1)
]
ops = [
["0" for _ in range(len_destination_seq + 1)] for _ in range(len_source_seq + 1)
]

for i in range(1, len_source_seq + 1):
costs[i][0] = i * delete_cost
ops[i][0] = f"D{source_seq[i - 1]}"

for i in range(1, len_destination_seq + 1):
costs[0][i] = i * insert_cost
ops[0][i] = f"I{destination_seq[i - 1]}"

for i in range(1, len_source_seq + 1):
for j in range(1, len_destination_seq + 1):
if source_seq[i - 1] == destination_seq[j - 1]:
costs[i][j] = costs[i - 1][j - 1] + copy_cost
ops[i][j] = f"C{source_seq[i - 1]}"
else:
costs[i][j] = costs[i - 1][j - 1] + replace_cost
ops[i][j] = f"R{source_seq[i - 1]}" + str(destination_seq[j - 1])

if costs[i - 1][j] + delete_cost < costs[i][j]:
costs[i][j] = costs[i - 1][j] + delete_cost
ops[i][j] = f"D{source_seq[i - 1]}"

if costs[i][j - 1] + insert_cost < costs[i][j]:
costs[i][j] = costs[i][j - 1] + insert_cost
ops[i][j] = f"I{destination_seq[j - 1]}"

return costs, ops


def assemble_transformation(ops: list[list[str]], i: int, j: int) -> list[str]:
"""
Assembles the transformations based on the ops table.

>>> ops = [['0', 'Ic', 'Iu', 'It'],
... ['Dc', 'Cc', 'Iu', 'It'],
... ['Da', 'Da', 'Rau', 'Rat'],
... ['Dt', 'Dt', 'Rtu', 'Ct']]
>>> x = len(ops) - 1
>>> y = len(ops[0]) - 1
>>> assemble_transformation(ops, x, y)
['Cc', 'Rau', 'Ct']

>>> ops1 = [['0']]
>>> x1 = len(ops1) - 1
>>> y1 = len(ops1[0]) - 1
>>> assemble_transformation(ops1, x1, y1)
[]

>>> ops2 = [['0', 'I1', 'I2', 'I3'],
... ['D1', 'C1', 'I2', 'I3'],
... ['D2', 'D2', 'R23', 'R23']]
>>> x2 = len(ops2) - 1
>>> y2 = len(ops2[0]) - 1
>>> assemble_transformation(ops2, x2, y2)
['C1', 'I2', 'R23']
"""
if i == 0 and j == 0:
return []
elif ops[i][j][0] in {"C", "R"}:
seq = assemble_transformation(ops, i - 1, j - 1)
seq.append(ops[i][j])
return seq
elif ops[i][j][0] == "D":
seq = assemble_transformation(ops, i - 1, j)
seq.append(ops[i][j])
return seq
else:
seq = assemble_transformation(ops, i, j - 1)
seq.append(ops[i][j])
return seq


if __name__ == "__main__":
_, operations = compute_transform_tables("Python", "Algorithms", -1, 1, 2, 2)

m = len(operations)
n = len(operations[0])
sequence = assemble_transformation(operations, m - 1, n - 1)

string = list("Python")
i = 0
cost = 0

with open("min_cost.txt", "w") as file:
for op in sequence:
print("".join(string))

if op[0] == "C":
file.write("%-16s" % "Copy %c" % op[1]) # noqa: UP031
file.write("\t\t\t" + "".join(string))
file.write("\r\n")

cost -= 1
elif op[0] == "R":
string[i] = op[2]

file.write("%-16s" % ("Replace %c" % op[1] + " with " + str(op[2]))) # noqa: UP031
file.write("\t\t" + "".join(string))
file.write("\r\n")

cost += 1
elif op[0] == "D":
string.pop(i)

file.write("%-16s" % "Delete %c" % op[1]) # noqa: UP031
file.write("\t\t\t" + "".join(string))
file.write("\r\n")

cost += 2
s1, s2, insert_cost, delete_cost, replace_cost, swap_cost, ignore_case=False

Check failure on line 2 in strings/min_cost_string_conversion.py

View workflow job for this annotation

GitHub Actions / ruff

Ruff (ARG001)

strings/min_cost_string_conversion.py:2:53: ARG001 Unused function argument: `swap_cost`
):
if ignore_case:
s1, s2 = s1.lower(), s2.lower()

m, n = len(s1), len(s2)

# cost table
dp = [[0] * (n + 1) for _ in range(m + 1)]
# operation table
op = [["0"] * (n + 1) for _ in range(m + 1)]

# Initialize base cases
for i in range(1, m + 1):
dp[i][0] = i * delete_cost
op[i][0] = "D" + s1[i - 1]
for j in range(1, n + 1):
dp[0][j] = j * insert_cost
op[0][j] = "I" + s2[j - 1]

# Fill DP tables
for i in range(1, m + 1):
for j in range(1, n + 1):
if s1[i - 1] == s2[j - 1]:
dp[i][j] = dp[i - 1][j - 1]
op[i][j] = "C" + s1[i - 1]
else:
string.insert(i, op[1])

file.write("%-16s" % "Insert %c" % op[1]) # noqa: UP031
file.write("\t\t\t" + "".join(string))
file.write("\r\n")

cost += 2

i += 1

print("".join(string))
print("Cost: ", cost)

file.write("\r\nMinimum cost: " + str(cost))
replace = dp[i - 1][j - 1] + replace_cost
insert = dp[i][j - 1] + insert_cost
delete = dp[i - 1][j] + delete_cost

# Choose min cost, tie-breaking in order: replace > insert > delete
min_cost = min(replace, insert, delete)
dp[i][j] = min_cost

if min_cost == replace:
op[i][j] = "R" + s2[j - 1]
elif min_cost == insert:
op[i][j] = "I" + s2[j - 1]
else:
op[i][j] = "D" + s1[i - 1]

return dp, op
Loading