|
1 | | -""" |
2 | | -Algorithm for calculating the most cost-efficient sequence for converting one string |
3 | | -into another. |
4 | | -The only allowed operations are |
5 | | ---- Cost to copy a character is copy_cost |
6 | | ---- Cost to replace a character is replace_cost |
7 | | ---- Cost to delete a character is delete_cost |
8 | | ---- Cost to insert a character is insert_cost |
9 | | -""" |
10 | | - |
11 | | - |
12 | | -def compute_transform_tables( |
13 | | - source_string: str, |
14 | | - destination_string: str, |
15 | | - copy_cost: int, |
16 | | - replace_cost: int, |
17 | | - delete_cost: int, |
18 | | - insert_cost: int, |
19 | | -) -> tuple[list[list[int]], list[list[str]]]: |
20 | | - """ |
21 | | - Finds the most cost efficient sequence |
22 | | - for converting one string into another. |
23 | | -
|
24 | | - >>> costs, operations = compute_transform_tables("cat", "cut", 1, 2, 3, 3) |
25 | | - >>> costs[0][:4] |
26 | | - [0, 3, 6, 9] |
27 | | - >>> costs[2][:4] |
28 | | - [6, 4, 3, 6] |
29 | | - >>> operations[0][:4] |
30 | | - ['0', 'Ic', 'Iu', 'It'] |
31 | | - >>> operations[3][:4] |
32 | | - ['Dt', 'Dt', 'Rtu', 'Ct'] |
33 | | -
|
34 | | - >>> compute_transform_tables("", "", 1, 2, 3, 3) |
35 | | - ([[0]], [['0']]) |
36 | | - """ |
37 | | - source_seq = list(source_string) |
38 | | - destination_seq = list(destination_string) |
39 | | - len_source_seq = len(source_seq) |
40 | | - len_destination_seq = len(destination_seq) |
41 | | - costs = [ |
42 | | - [0 for _ in range(len_destination_seq + 1)] for _ in range(len_source_seq + 1) |
43 | | - ] |
44 | | - ops = [ |
45 | | - ["0" for _ in range(len_destination_seq + 1)] for _ in range(len_source_seq + 1) |
46 | | - ] |
47 | | - |
48 | | - for i in range(1, len_source_seq + 1): |
49 | | - costs[i][0] = i * delete_cost |
50 | | - ops[i][0] = f"D{source_seq[i - 1]}" |
51 | | - |
52 | | - for i in range(1, len_destination_seq + 1): |
53 | | - costs[0][i] = i * insert_cost |
54 | | - ops[0][i] = f"I{destination_seq[i - 1]}" |
55 | | - |
56 | | - for i in range(1, len_source_seq + 1): |
57 | | - for j in range(1, len_destination_seq + 1): |
58 | | - if source_seq[i - 1] == destination_seq[j - 1]: |
59 | | - costs[i][j] = costs[i - 1][j - 1] + copy_cost |
60 | | - ops[i][j] = f"C{source_seq[i - 1]}" |
61 | | - else: |
62 | | - costs[i][j] = costs[i - 1][j - 1] + replace_cost |
63 | | - ops[i][j] = f"R{source_seq[i - 1]}" + str(destination_seq[j - 1]) |
64 | | - |
65 | | - if costs[i - 1][j] + delete_cost < costs[i][j]: |
66 | | - costs[i][j] = costs[i - 1][j] + delete_cost |
67 | | - ops[i][j] = f"D{source_seq[i - 1]}" |
68 | | - |
69 | | - if costs[i][j - 1] + insert_cost < costs[i][j]: |
70 | | - costs[i][j] = costs[i][j - 1] + insert_cost |
71 | | - ops[i][j] = f"I{destination_seq[j - 1]}" |
72 | | - |
73 | | - return costs, ops |
74 | | - |
75 | | - |
76 | | -def assemble_transformation(ops: list[list[str]], i: int, j: int) -> list[str]: |
77 | | - """ |
78 | | - Assembles the transformations based on the ops table. |
79 | | -
|
80 | | - >>> ops = [['0', 'Ic', 'Iu', 'It'], |
81 | | - ... ['Dc', 'Cc', 'Iu', 'It'], |
82 | | - ... ['Da', 'Da', 'Rau', 'Rat'], |
83 | | - ... ['Dt', 'Dt', 'Rtu', 'Ct']] |
84 | | - >>> x = len(ops) - 1 |
85 | | - >>> y = len(ops[0]) - 1 |
86 | | - >>> assemble_transformation(ops, x, y) |
87 | | - ['Cc', 'Rau', 'Ct'] |
88 | | -
|
89 | | - >>> ops1 = [['0']] |
90 | | - >>> x1 = len(ops1) - 1 |
91 | | - >>> y1 = len(ops1[0]) - 1 |
92 | | - >>> assemble_transformation(ops1, x1, y1) |
93 | | - [] |
94 | | -
|
95 | | - >>> ops2 = [['0', 'I1', 'I2', 'I3'], |
96 | | - ... ['D1', 'C1', 'I2', 'I3'], |
97 | | - ... ['D2', 'D2', 'R23', 'R23']] |
98 | | - >>> x2 = len(ops2) - 1 |
99 | | - >>> y2 = len(ops2[0]) - 1 |
100 | | - >>> assemble_transformation(ops2, x2, y2) |
101 | | - ['C1', 'I2', 'R23'] |
102 | | - """ |
103 | | - if i == 0 and j == 0: |
104 | | - return [] |
105 | | - elif ops[i][j][0] in {"C", "R"}: |
106 | | - seq = assemble_transformation(ops, i - 1, j - 1) |
107 | | - seq.append(ops[i][j]) |
108 | | - return seq |
109 | | - elif ops[i][j][0] == "D": |
110 | | - seq = assemble_transformation(ops, i - 1, j) |
111 | | - seq.append(ops[i][j]) |
112 | | - return seq |
113 | | - else: |
114 | | - seq = assemble_transformation(ops, i, j - 1) |
115 | | - seq.append(ops[i][j]) |
116 | | - return seq |
117 | | - |
118 | | - |
119 | | -if __name__ == "__main__": |
120 | | - _, operations = compute_transform_tables("Python", "Algorithms", -1, 1, 2, 2) |
121 | | - |
122 | | - m = len(operations) |
123 | | - n = len(operations[0]) |
124 | | - sequence = assemble_transformation(operations, m - 1, n - 1) |
125 | | - |
126 | | - string = list("Python") |
127 | | - i = 0 |
128 | | - cost = 0 |
129 | | - |
130 | | - with open("min_cost.txt", "w") as file: |
131 | | - for op in sequence: |
132 | | - print("".join(string)) |
133 | | - |
134 | | - if op[0] == "C": |
135 | | - file.write("%-16s" % "Copy %c" % op[1]) # noqa: UP031 |
136 | | - file.write("\t\t\t" + "".join(string)) |
137 | | - file.write("\r\n") |
138 | | - |
139 | | - cost -= 1 |
140 | | - elif op[0] == "R": |
141 | | - string[i] = op[2] |
142 | | - |
143 | | - file.write("%-16s" % ("Replace %c" % op[1] + " with " + str(op[2]))) # noqa: UP031 |
144 | | - file.write("\t\t" + "".join(string)) |
145 | | - file.write("\r\n") |
146 | | - |
147 | | - cost += 1 |
148 | | - elif op[0] == "D": |
149 | | - string.pop(i) |
150 | | - |
151 | | - file.write("%-16s" % "Delete %c" % op[1]) # noqa: UP031 |
152 | | - file.write("\t\t\t" + "".join(string)) |
153 | | - file.write("\r\n") |
154 | | - |
155 | | - cost += 2 |
| 1 | +def compute_transform_tables(s1, s2, insert_cost, delete_cost, replace_cost, swap_cost, ignore_case=False): |
| 2 | + if ignore_case: |
| 3 | + s1, s2 = s1.lower(), s2.lower() |
| 4 | + |
| 5 | + m, n = len(s1), len(s2) |
| 6 | + |
| 7 | + # cost table |
| 8 | + dp = [[0] * (n + 1) for _ in range(m + 1)] |
| 9 | + # operation table |
| 10 | + op = [["0"] * (n + 1) for _ in range(m + 1)] |
| 11 | + |
| 12 | + # Initialize base cases |
| 13 | + for i in range(1, m + 1): |
| 14 | + dp[i][0] = i * delete_cost |
| 15 | + op[i][0] = 'D' + s1[i - 1] |
| 16 | + for j in range(1, n + 1): |
| 17 | + dp[0][j] = j * insert_cost |
| 18 | + op[0][j] = 'I' + s2[j - 1] |
| 19 | + |
| 20 | + # Fill DP tables |
| 21 | + for i in range(1, m + 1): |
| 22 | + for j in range(1, n + 1): |
| 23 | + if s1[i - 1] == s2[j - 1]: |
| 24 | + dp[i][j] = dp[i - 1][j - 1] |
| 25 | + op[i][j] = 'C' + s1[i - 1] |
156 | 26 | else: |
157 | | - string.insert(i, op[1]) |
| 27 | + replace = dp[i - 1][j - 1] + replace_cost |
| 28 | + insert = dp[i][j - 1] + insert_cost |
| 29 | + delete = dp[i - 1][j] + delete_cost |
158 | 30 |
|
159 | | - file.write("%-16s" % "Insert %c" % op[1]) # noqa: UP031 |
160 | | - file.write("\t\t\t" + "".join(string)) |
161 | | - file.write("\r\n") |
| 31 | + # Choose min cost, tie-breaking in order: replace > insert > delete |
| 32 | + min_cost = min(replace, insert, delete) |
| 33 | + dp[i][j] = min_cost |
162 | 34 |
|
163 | | - cost += 2 |
| 35 | + if min_cost == replace: |
| 36 | + op[i][j] = 'R' + s2[j - 1] |
| 37 | + elif min_cost == insert: |
| 38 | + op[i][j] = 'I' + s2[j - 1] |
| 39 | + else: |
| 40 | + op[i][j] = 'D' + s1[i - 1] |
164 | 41 |
|
165 | | - i += 1 |
| 42 | + return dp, op |
166 | 43 |
|
167 | | - print("".join(string)) |
168 | | - print("Cost: ", cost) |
169 | 44 |
|
170 | | - file.write("\r\nMinimum cost: " + str(cost)) |
0 commit comments