|
1 |
| -""" |
2 |
| -Algorithm for calculating the most cost-efficient sequence for converting one string |
3 |
| -into another. |
4 |
| -The only allowed operations are |
5 |
| ---- Cost to copy a character is copy_cost |
6 |
| ---- Cost to replace a character is replace_cost |
7 |
| ---- Cost to delete a character is delete_cost |
8 |
| ---- Cost to insert a character is insert_cost |
9 |
| -""" |
10 |
| - |
11 |
| - |
12 |
| -def compute_transform_tables( |
13 |
| - source_string: str, |
14 |
| - destination_string: str, |
15 |
| - copy_cost: int, |
16 |
| - replace_cost: int, |
17 |
| - delete_cost: int, |
18 |
| - insert_cost: int, |
19 |
| -) -> tuple[list[list[int]], list[list[str]]]: |
20 |
| - """ |
21 |
| - Finds the most cost efficient sequence |
22 |
| - for converting one string into another. |
23 |
| -
|
24 |
| - >>> costs, operations = compute_transform_tables("cat", "cut", 1, 2, 3, 3) |
25 |
| - >>> costs[0][:4] |
26 |
| - [0, 3, 6, 9] |
27 |
| - >>> costs[2][:4] |
28 |
| - [6, 4, 3, 6] |
29 |
| - >>> operations[0][:4] |
30 |
| - ['0', 'Ic', 'Iu', 'It'] |
31 |
| - >>> operations[3][:4] |
32 |
| - ['Dt', 'Dt', 'Rtu', 'Ct'] |
33 |
| -
|
34 |
| - >>> compute_transform_tables("", "", 1, 2, 3, 3) |
35 |
| - ([[0]], [['0']]) |
36 |
| - """ |
37 |
| - source_seq = list(source_string) |
38 |
| - destination_seq = list(destination_string) |
39 |
| - len_source_seq = len(source_seq) |
40 |
| - len_destination_seq = len(destination_seq) |
41 |
| - costs = [ |
42 |
| - [0 for _ in range(len_destination_seq + 1)] for _ in range(len_source_seq + 1) |
43 |
| - ] |
44 |
| - ops = [ |
45 |
| - ["0" for _ in range(len_destination_seq + 1)] for _ in range(len_source_seq + 1) |
46 |
| - ] |
47 |
| - |
48 |
| - for i in range(1, len_source_seq + 1): |
49 |
| - costs[i][0] = i * delete_cost |
50 |
| - ops[i][0] = f"D{source_seq[i - 1]}" |
51 |
| - |
52 |
| - for i in range(1, len_destination_seq + 1): |
53 |
| - costs[0][i] = i * insert_cost |
54 |
| - ops[0][i] = f"I{destination_seq[i - 1]}" |
55 |
| - |
56 |
| - for i in range(1, len_source_seq + 1): |
57 |
| - for j in range(1, len_destination_seq + 1): |
58 |
| - if source_seq[i - 1] == destination_seq[j - 1]: |
59 |
| - costs[i][j] = costs[i - 1][j - 1] + copy_cost |
60 |
| - ops[i][j] = f"C{source_seq[i - 1]}" |
61 |
| - else: |
62 |
| - costs[i][j] = costs[i - 1][j - 1] + replace_cost |
63 |
| - ops[i][j] = f"R{source_seq[i - 1]}" + str(destination_seq[j - 1]) |
64 |
| - |
65 |
| - if costs[i - 1][j] + delete_cost < costs[i][j]: |
66 |
| - costs[i][j] = costs[i - 1][j] + delete_cost |
67 |
| - ops[i][j] = f"D{source_seq[i - 1]}" |
68 |
| - |
69 |
| - if costs[i][j - 1] + insert_cost < costs[i][j]: |
70 |
| - costs[i][j] = costs[i][j - 1] + insert_cost |
71 |
| - ops[i][j] = f"I{destination_seq[j - 1]}" |
72 |
| - |
73 |
| - return costs, ops |
74 |
| - |
75 |
| - |
76 |
| -def assemble_transformation(ops: list[list[str]], i: int, j: int) -> list[str]: |
77 |
| - """ |
78 |
| - Assembles the transformations based on the ops table. |
79 |
| -
|
80 |
| - >>> ops = [['0', 'Ic', 'Iu', 'It'], |
81 |
| - ... ['Dc', 'Cc', 'Iu', 'It'], |
82 |
| - ... ['Da', 'Da', 'Rau', 'Rat'], |
83 |
| - ... ['Dt', 'Dt', 'Rtu', 'Ct']] |
84 |
| - >>> x = len(ops) - 1 |
85 |
| - >>> y = len(ops[0]) - 1 |
86 |
| - >>> assemble_transformation(ops, x, y) |
87 |
| - ['Cc', 'Rau', 'Ct'] |
88 |
| -
|
89 |
| - >>> ops1 = [['0']] |
90 |
| - >>> x1 = len(ops1) - 1 |
91 |
| - >>> y1 = len(ops1[0]) - 1 |
92 |
| - >>> assemble_transformation(ops1, x1, y1) |
93 |
| - [] |
94 |
| -
|
95 |
| - >>> ops2 = [['0', 'I1', 'I2', 'I3'], |
96 |
| - ... ['D1', 'C1', 'I2', 'I3'], |
97 |
| - ... ['D2', 'D2', 'R23', 'R23']] |
98 |
| - >>> x2 = len(ops2) - 1 |
99 |
| - >>> y2 = len(ops2[0]) - 1 |
100 |
| - >>> assemble_transformation(ops2, x2, y2) |
101 |
| - ['C1', 'I2', 'R23'] |
102 |
| - """ |
103 |
| - if i == 0 and j == 0: |
104 |
| - return [] |
105 |
| - elif ops[i][j][0] in {"C", "R"}: |
106 |
| - seq = assemble_transformation(ops, i - 1, j - 1) |
107 |
| - seq.append(ops[i][j]) |
108 |
| - return seq |
109 |
| - elif ops[i][j][0] == "D": |
110 |
| - seq = assemble_transformation(ops, i - 1, j) |
111 |
| - seq.append(ops[i][j]) |
112 |
| - return seq |
113 |
| - else: |
114 |
| - seq = assemble_transformation(ops, i, j - 1) |
115 |
| - seq.append(ops[i][j]) |
116 |
| - return seq |
117 |
| - |
118 |
| - |
119 |
| -if __name__ == "__main__": |
120 |
| - _, operations = compute_transform_tables("Python", "Algorithms", -1, 1, 2, 2) |
121 |
| - |
122 |
| - m = len(operations) |
123 |
| - n = len(operations[0]) |
124 |
| - sequence = assemble_transformation(operations, m - 1, n - 1) |
125 |
| - |
126 |
| - string = list("Python") |
127 |
| - i = 0 |
128 |
| - cost = 0 |
129 |
| - |
130 |
| - with open("min_cost.txt", "w") as file: |
131 |
| - for op in sequence: |
132 |
| - print("".join(string)) |
133 |
| - |
134 |
| - if op[0] == "C": |
135 |
| - file.write("%-16s" % "Copy %c" % op[1]) # noqa: UP031 |
136 |
| - file.write("\t\t\t" + "".join(string)) |
137 |
| - file.write("\r\n") |
138 |
| - |
139 |
| - cost -= 1 |
140 |
| - elif op[0] == "R": |
141 |
| - string[i] = op[2] |
142 |
| - |
143 |
| - file.write("%-16s" % ("Replace %c" % op[1] + " with " + str(op[2]))) # noqa: UP031 |
144 |
| - file.write("\t\t" + "".join(string)) |
145 |
| - file.write("\r\n") |
146 |
| - |
147 |
| - cost += 1 |
148 |
| - elif op[0] == "D": |
149 |
| - string.pop(i) |
150 |
| - |
151 |
| - file.write("%-16s" % "Delete %c" % op[1]) # noqa: UP031 |
152 |
| - file.write("\t\t\t" + "".join(string)) |
153 |
| - file.write("\r\n") |
154 |
| - |
155 |
| - cost += 2 |
| 1 | +def compute_transform_tables(s1, s2, insert_cost, delete_cost, replace_cost, swap_cost, ignore_case=False): |
| 2 | + if ignore_case: |
| 3 | + s1, s2 = s1.lower(), s2.lower() |
| 4 | + |
| 5 | + m, n = len(s1), len(s2) |
| 6 | + |
| 7 | + # cost table |
| 8 | + dp = [[0] * (n + 1) for _ in range(m + 1)] |
| 9 | + # operation table |
| 10 | + op = [["0"] * (n + 1) for _ in range(m + 1)] |
| 11 | + |
| 12 | + # Initialize base cases |
| 13 | + for i in range(1, m + 1): |
| 14 | + dp[i][0] = i * delete_cost |
| 15 | + op[i][0] = 'D' + s1[i - 1] |
| 16 | + for j in range(1, n + 1): |
| 17 | + dp[0][j] = j * insert_cost |
| 18 | + op[0][j] = 'I' + s2[j - 1] |
| 19 | + |
| 20 | + # Fill DP tables |
| 21 | + for i in range(1, m + 1): |
| 22 | + for j in range(1, n + 1): |
| 23 | + if s1[i - 1] == s2[j - 1]: |
| 24 | + dp[i][j] = dp[i - 1][j - 1] |
| 25 | + op[i][j] = 'C' + s1[i - 1] |
156 | 26 | else:
|
157 |
| - string.insert(i, op[1]) |
| 27 | + replace = dp[i - 1][j - 1] + replace_cost |
| 28 | + insert = dp[i][j - 1] + insert_cost |
| 29 | + delete = dp[i - 1][j] + delete_cost |
158 | 30 |
|
159 |
| - file.write("%-16s" % "Insert %c" % op[1]) # noqa: UP031 |
160 |
| - file.write("\t\t\t" + "".join(string)) |
161 |
| - file.write("\r\n") |
| 31 | + # Choose min cost, tie-breaking in order: replace > insert > delete |
| 32 | + min_cost = min(replace, insert, delete) |
| 33 | + dp[i][j] = min_cost |
162 | 34 |
|
163 |
| - cost += 2 |
| 35 | + if min_cost == replace: |
| 36 | + op[i][j] = 'R' + s2[j - 1] |
| 37 | + elif min_cost == insert: |
| 38 | + op[i][j] = 'I' + s2[j - 1] |
| 39 | + else: |
| 40 | + op[i][j] = 'D' + s1[i - 1] |
164 | 41 |
|
165 |
| - i += 1 |
| 42 | + return dp, op |
166 | 43 |
|
167 |
| - print("".join(string)) |
168 |
| - print("Cost: ", cost) |
169 | 44 |
|
170 |
| - file.write("\r\nMinimum cost: " + str(cost)) |
0 commit comments