44
55std::vector<std::vector<double >> compute_dp_table (
66 const std::string& a,
7- const std::string& b,
8- const std::map<CppEditopName, double >& cost_map
7+ const std::string& b,
8+ double replace_weight,
9+ double insert_weight,
10+ double delete_weight,
11+ double swap_weight
912) {
1013 int len_a = a.length ();
1114 int len_b = b.length ();
1215 std::vector<std::vector<double >> dp (len_a + 1 , std::vector<double >(len_b + 1 , 0.0 ));
1316
1417 for (int i = 0 ; i <= len_a; ++i) {
15- dp[i][0 ] = i * cost_map. at (DELETE) ;
18+ dp[i][0 ] = i * delete_weight ;
1619 }
1720 for (int j = 0 ; j <= len_b; ++j) {
18- dp[0 ][j] = j * cost_map. at (INSERT) ;
21+ dp[0 ][j] = j * insert_weight ;
1922 }
2023
2124 for (int i = 1 ; i <= len_a; ++i) {
2225 for (int j = 1 ; j <= len_b; ++j) {
23- double deletion = dp[i-1 ][j] + cost_map.at (DELETE);
24- double insertion = dp[i][j-1 ] + cost_map.at (INSERT);
25- double substitution_cost = (a[i-1 ] == b[j-1 ]) ? 0.0 : cost_map.at (REPLACE);
26- double substitution = dp[i-1 ][j-1 ] + substitution_cost;
27-
26+ if (a[i-1 ] == b[j-1 ]) {
27+ dp[i][j] = dp[i-1 ][j-1 ]; // match, no cost
28+ continue ; // skip swap and other ops, match is optimal
29+ }
30+ double deletion = dp[i-1 ][j] + delete_weight;
31+ double insertion = dp[i][j-1 ] + insert_weight;
32+ double substitution = dp[i-1 ][j-1 ] + replace_weight;
2833 dp[i][j] = std::min ({deletion, insertion, substitution});
29-
3034 if (i > 1 && j > 1 &&
3135 a[i-1 ] == b[j-2 ] && a[i-2 ] == b[j-1 ]) {
3236 dp[i][j] = std::min (dp[i][j],
33- dp[i-2 ][j-2 ] + cost_map. at (SWAP) );
37+ dp[i-2 ][j-2 ] + swap_weight );
3438 }
3539 }
3640 }
3741
3842 return dp;
3943}
4044
41-
4245double cpp_compute_distance (
4346 const std::string& a,
44- const std::string& b,
45- const std::map<CppEditopName, double >& cost_map
47+ const std::string& b,
48+ double replace_weight,
49+ double insert_weight,
50+ double delete_weight,
51+ double swap_weight
4652) {
47- auto dp = compute_dp_table (a, b, cost_map );
53+ auto dp = compute_dp_table (a, b, replace_weight, insert_weight, delete_weight, swap_weight );
4854 return dp[a.length ()][b.length ()];
4955}
5056
5157std::vector<std::vector<CppEditop>> backtrack_all_paths (
5258 const std::string& a,
5359 const std::string& b,
54- const std::map<CppEditopName, double >& cost_map,
5560 const std::vector<std::vector<double >>& dp,
5661 int i,
5762 int j,
58- std::vector<CppEditop>& current_path
63+ std::vector<CppEditop>& current_path,
64+ double replace_weight,
65+ double insert_weight,
66+ double delete_weight,
67+ double swap_weight
5968) {
6069 if (i == 0 && j == 0 ) {
6170 std::vector<CppEditop> reversed_path = current_path;
@@ -67,70 +76,71 @@ std::vector<std::vector<CppEditop>> backtrack_all_paths(
6776 double current_cost = dp[i][j];
6877 const double tol = 1e-6 ;
6978
70-
71- if (i > 0 && std::abs ((dp[i-1 ][j] + cost_map.at (DELETE)) - current_cost) < tol) {
72- CppEditop op (DELETE, i-1 , i-1 , cost_map.at (DELETE), std::string (1 , a[i-1 ]));
79+ if (i > 0 && std::abs ((dp[i-1 ][j] + delete_weight) - current_cost) < tol) {
80+ CppEditop op (DELETE, i-1 , i-1 , delete_weight, std::string (1 , a[i-1 ]));
7381 current_path.push_back (op);
74- auto paths = backtrack_all_paths (a, b, cost_map, dp, i-1 , j, current_path);
82+ auto paths = backtrack_all_paths (a, b, dp, i-1 , j, current_path, replace_weight, insert_weight, delete_weight, swap_weight );
7583 all_paths.insert (all_paths.end (), paths.begin (), paths.end ());
7684 current_path.pop_back ();
7785 }
7886
79- if (j > 0 && std::abs ((dp[i][j-1 ] + cost_map. at (INSERT) ) - current_cost) < tol) {
80- CppEditop op (INSERT, i, i, cost_map. at (INSERT) , std::string (1 , b[j-1 ]));
87+ if (j > 0 && std::abs ((dp[i][j-1 ] + insert_weight ) - current_cost) < tol) {
88+ CppEditop op (INSERT, i, i, insert_weight , std::string (1 , b[j-1 ]));
8189 current_path.push_back (op);
82- auto paths = backtrack_all_paths (a, b, cost_map, dp, i, j-1 , current_path);
90+ auto paths = backtrack_all_paths (a, b, dp, i, j-1 , current_path, replace_weight, insert_weight, delete_weight, swap_weight );
8391 all_paths.insert (all_paths.end (), paths.begin (), paths.end ());
8492 current_path.pop_back ();
8593 }
8694
87-
8895 if (i > 0 && j > 0 ) {
89- double sub_cost = (a[i-1 ] == b[j-1 ]) ? 0.0 : cost_map. at (REPLACE) ;
96+ double sub_cost = (a[i-1 ] == b[j-1 ]) ? 0.0 : replace_weight ;
9097 if (std::abs ((dp[i-1 ][j-1 ] + sub_cost) - current_cost) < tol) {
9198 std::string out_char = (sub_cost == 0.0 ) ? std::string (1 , a[i-1 ]) : std::string (1 , b[j-1 ]);
9299 CppEditop op (REPLACE, i-1 , j-1 , sub_cost, out_char);
93100 current_path.push_back (op);
94- auto paths = backtrack_all_paths (a, b, cost_map, dp, i-1 , j-1 , current_path);
101+ auto paths = backtrack_all_paths (a, b, dp, i-1 , j-1 , current_path, replace_weight, insert_weight, delete_weight, swap_weight );
95102 all_paths.insert (all_paths.end (), paths.begin (), paths.end ());
96103 current_path.pop_back ();
97104 }
98105 }
99106
100-
101107 if (i > 1 && j > 1 &&
102108 a[i-1 ] == b[j-2 ] && a[i-2 ] == b[j-1 ] &&
103- std::abs ((dp[i-2 ][j-2 ] + cost_map. at (SWAP) ) - current_cost) < tol) {
109+ std::abs ((dp[i-2 ][j-2 ] + swap_weight ) - current_cost) < tol) {
104110 std::string swap_str = std::string (1 , b[j-2 ]) + std::string (1 , b[j-1 ]);
105- CppEditop op (SWAP, i-2 , j-2 , cost_map. at (SWAP) , swap_str);
111+ CppEditop op (SWAP, i-2 , j-2 , swap_weight , swap_str);
106112 current_path.push_back (op);
107- auto paths = backtrack_all_paths (a, b, cost_map, dp, i-2 , j-2 , current_path);
113+ auto paths = backtrack_all_paths (a, b, dp, i-2 , j-2 , current_path, replace_weight, insert_weight, delete_weight, swap_weight );
108114 all_paths.insert (all_paths.end (), paths.begin (), paths.end ());
109115 current_path.pop_back ();
110116 }
111117
112118 return all_paths;
113119}
114120
115-
116121std::vector<std::vector<CppEditop>> cpp_compute_all_paths (
117122 const std::string& a,
118- const std::string& b,
119- const std::map<CppEditopName, double >& cost_map
123+ const std::string& b,
124+ double replace_weight,
125+ double insert_weight,
126+ double delete_weight,
127+ double swap_weight
120128) {
121- auto dp = compute_dp_table (a, b, cost_map );
129+ auto dp = compute_dp_table (a, b, replace_weight, insert_weight, delete_weight, swap_weight );
122130 std::vector<CppEditop> current_path;
123- return backtrack_all_paths (a, b, cost_map, dp, a.length (), b.length (), current_path);
131+ return backtrack_all_paths (a, b, dp, a.length (), b.length (), current_path, replace_weight, insert_weight, delete_weight, swap_weight );
124132}
125133
126-
127134void cpp_print_all_paths (
128135 const std::string& a,
129- const std::string& b,
130- const std::map<CppEditopName, double >& cost_map
136+ const std::string& b,
137+ double replace_weight,
138+ double insert_weight,
139+ double delete_weight,
140+ double swap_weight
131141) {
132- auto paths = cpp_compute_all_paths (a, b, cost_map );
133- double distance = cpp_compute_distance (a, b, cost_map );
142+ auto paths = cpp_compute_all_paths (a, b, replace_weight, insert_weight, delete_weight, swap_weight );
143+ double distance = cpp_compute_distance (a, b, replace_weight, insert_weight, delete_weight, swap_weight );
134144
135145 std::cout << " OSA Distance from '" << a << " ' to '" << b << " ': " << distance << std::endl;
136146 std::cout << " Number of optimal edit sequences: " << paths.size () << std::endl;
0 commit comments