|
| 1 | +#include "cache.h" |
| 2 | +#include "tree.h" |
| 3 | +#include "tree-walk.h" |
| 4 | + |
| 5 | +static int score_missing(unsigned mode, const char *path) |
| 6 | +{ |
| 7 | + int score; |
| 8 | + |
| 9 | + if (S_ISDIR(mode)) |
| 10 | + score = -1000; |
| 11 | + else if (S_ISLNK(mode)) |
| 12 | + score = -500; |
| 13 | + else |
| 14 | + score = -50; |
| 15 | + return score; |
| 16 | +} |
| 17 | + |
| 18 | +static int score_differs(unsigned mode1, unsigned mode2, const char *path) |
| 19 | +{ |
| 20 | + int score; |
| 21 | + |
| 22 | + if (S_ISDIR(mode1) != S_ISDIR(mode2)) |
| 23 | + score = -100; |
| 24 | + else if (S_ISLNK(mode1) != S_ISLNK(mode2)) |
| 25 | + score = -50; |
| 26 | + else |
| 27 | + score = -5; |
| 28 | + return score; |
| 29 | +} |
| 30 | + |
| 31 | +static int score_matches(unsigned mode1, unsigned mode2, const char *path) |
| 32 | +{ |
| 33 | + int score; |
| 34 | + |
| 35 | + /* Heh, we found SHA-1 collisions between different kind of objects */ |
| 36 | + if (S_ISDIR(mode1) != S_ISDIR(mode2)) |
| 37 | + score = -100; |
| 38 | + else if (S_ISLNK(mode1) != S_ISLNK(mode2)) |
| 39 | + score = -50; |
| 40 | + |
| 41 | + else if (S_ISDIR(mode1)) |
| 42 | + score = 1000; |
| 43 | + else if (S_ISLNK(mode1)) |
| 44 | + score = 500; |
| 45 | + else |
| 46 | + score = 250; |
| 47 | + return score; |
| 48 | +} |
| 49 | + |
| 50 | +/* |
| 51 | + * Inspect two trees, and give a score that tells how similar they are. |
| 52 | + */ |
| 53 | +static int score_trees(const unsigned char *hash1, const unsigned char *hash2) |
| 54 | +{ |
| 55 | + struct tree_desc one; |
| 56 | + struct tree_desc two; |
| 57 | + void *one_buf, *two_buf; |
| 58 | + int score = 0; |
| 59 | + enum object_type type; |
| 60 | + unsigned long size; |
| 61 | + |
| 62 | + one_buf = read_sha1_file(hash1, &type, &size); |
| 63 | + if (!one_buf) |
| 64 | + die("unable to read tree (%s)", sha1_to_hex(hash1)); |
| 65 | + if (type != OBJ_TREE) |
| 66 | + die("%s is not a tree", sha1_to_hex(hash1)); |
| 67 | + init_tree_desc(&one, one_buf, size); |
| 68 | + two_buf = read_sha1_file(hash2, &type, &size); |
| 69 | + if (!two_buf) |
| 70 | + die("unable to read tree (%s)", sha1_to_hex(hash2)); |
| 71 | + if (type != OBJ_TREE) |
| 72 | + die("%s is not a tree", sha1_to_hex(hash2)); |
| 73 | + init_tree_desc(&two, two_buf, size); |
| 74 | + while (one.size | two.size) { |
| 75 | + const unsigned char *elem1 = elem1; |
| 76 | + const unsigned char *elem2 = elem2; |
| 77 | + const char *path1 = path1; |
| 78 | + const char *path2 = path2; |
| 79 | + unsigned mode1 = mode1; |
| 80 | + unsigned mode2 = mode2; |
| 81 | + int cmp; |
| 82 | + |
| 83 | + if (one.size) |
| 84 | + elem1 = tree_entry_extract(&one, &path1, &mode1); |
| 85 | + if (two.size) |
| 86 | + elem2 = tree_entry_extract(&two, &path2, &mode2); |
| 87 | + |
| 88 | + if (!one.size) { |
| 89 | + /* two has more entries */ |
| 90 | + score += score_missing(mode2, path2); |
| 91 | + update_tree_entry(&two); |
| 92 | + continue; |
| 93 | + } |
| 94 | + if (!two.size) { |
| 95 | + /* two lacks this entry */ |
| 96 | + score += score_missing(mode1, path1); |
| 97 | + update_tree_entry(&one); |
| 98 | + continue; |
| 99 | + } |
| 100 | + cmp = base_name_compare(path1, strlen(path1), mode1, |
| 101 | + path2, strlen(path2), mode2); |
| 102 | + if (cmp < 0) { |
| 103 | + /* path1 does not appear in two */ |
| 104 | + score += score_missing(mode1, path1); |
| 105 | + update_tree_entry(&one); |
| 106 | + continue; |
| 107 | + } |
| 108 | + else if (cmp > 0) { |
| 109 | + /* path2 does not appear in one */ |
| 110 | + score += score_missing(mode2, path2); |
| 111 | + update_tree_entry(&two); |
| 112 | + continue; |
| 113 | + } |
| 114 | + else if (hashcmp(elem1, elem2)) |
| 115 | + /* they are different */ |
| 116 | + score += score_differs(mode1, mode2, path1); |
| 117 | + else |
| 118 | + /* same subtree or blob */ |
| 119 | + score += score_matches(mode1, mode2, path1); |
| 120 | + update_tree_entry(&one); |
| 121 | + update_tree_entry(&two); |
| 122 | + } |
| 123 | + free(one_buf); |
| 124 | + free(two_buf); |
| 125 | + return score; |
| 126 | +} |
| 127 | + |
| 128 | +/* |
| 129 | + * Match one itself and its subtrees with two and pick the best match. |
| 130 | + */ |
| 131 | +static void match_trees(const unsigned char *hash1, |
| 132 | + const unsigned char *hash2, |
| 133 | + int *best_score, |
| 134 | + char **best_match, |
| 135 | + char *base, |
| 136 | + int recurse_limit) |
| 137 | +{ |
| 138 | + struct tree_desc one; |
| 139 | + void *one_buf; |
| 140 | + enum object_type type; |
| 141 | + unsigned long size; |
| 142 | + |
| 143 | + one_buf = read_sha1_file(hash1, &type, &size); |
| 144 | + if (!one_buf) |
| 145 | + die("unable to read tree (%s)", sha1_to_hex(hash1)); |
| 146 | + if (type != OBJ_TREE) |
| 147 | + die("%s is not a tree", sha1_to_hex(hash1)); |
| 148 | + init_tree_desc(&one, one_buf, size); |
| 149 | + |
| 150 | + while (one.size) { |
| 151 | + const char *path; |
| 152 | + const unsigned char *elem; |
| 153 | + unsigned mode; |
| 154 | + int score; |
| 155 | + |
| 156 | + elem = tree_entry_extract(&one, &path, &mode); |
| 157 | + if (!S_ISDIR(mode)) |
| 158 | + goto next; |
| 159 | + score = score_trees(elem, hash2); |
| 160 | + if (*best_score < score) { |
| 161 | + char *newpath; |
| 162 | + newpath = xmalloc(strlen(base) + strlen(path) + 1); |
| 163 | + sprintf(newpath, "%s%s", base, path); |
| 164 | + free(*best_match); |
| 165 | + *best_match = newpath; |
| 166 | + *best_score = score; |
| 167 | + } |
| 168 | + if (recurse_limit) { |
| 169 | + char *newbase; |
| 170 | + newbase = xmalloc(strlen(base) + strlen(path) + 2); |
| 171 | + sprintf(newbase, "%s%s/", base, path); |
| 172 | + match_trees(elem, hash2, best_score, best_match, |
| 173 | + newbase, recurse_limit - 1); |
| 174 | + free(newbase); |
| 175 | + } |
| 176 | + |
| 177 | + next: |
| 178 | + update_tree_entry(&one); |
| 179 | + } |
| 180 | + free(one_buf); |
| 181 | +} |
| 182 | + |
| 183 | +/* |
| 184 | + * A tree "hash1" has a subdirectory at "prefix". Come up with a |
| 185 | + * tree object by replacing it with another tree "hash2". |
| 186 | + */ |
| 187 | +static int splice_tree(const unsigned char *hash1, |
| 188 | + char *prefix, |
| 189 | + const unsigned char *hash2, |
| 190 | + unsigned char *result) |
| 191 | +{ |
| 192 | + char *subpath; |
| 193 | + int toplen; |
| 194 | + char *buf; |
| 195 | + unsigned long sz; |
| 196 | + struct tree_desc desc; |
| 197 | + unsigned char *rewrite_here; |
| 198 | + const unsigned char *rewrite_with; |
| 199 | + unsigned char subtree[20]; |
| 200 | + enum object_type type; |
| 201 | + int status; |
| 202 | + |
| 203 | + subpath = strchr(prefix, '/'); |
| 204 | + if (!subpath) |
| 205 | + toplen = strlen(prefix); |
| 206 | + else { |
| 207 | + toplen = subpath - prefix; |
| 208 | + subpath++; |
| 209 | + } |
| 210 | + |
| 211 | + buf = read_sha1_file(hash1, &type, &sz); |
| 212 | + if (!buf) |
| 213 | + die("cannot read tree %s", sha1_to_hex(hash1)); |
| 214 | + init_tree_desc(&desc, buf, sz); |
| 215 | + |
| 216 | + rewrite_here = NULL; |
| 217 | + while (desc.size) { |
| 218 | + const char *name; |
| 219 | + unsigned mode; |
| 220 | + const unsigned char *sha1; |
| 221 | + |
| 222 | + sha1 = tree_entry_extract(&desc, &name, &mode); |
| 223 | + if (strlen(name) == toplen && |
| 224 | + !memcmp(name, prefix, toplen)) { |
| 225 | + if (!S_ISDIR(mode)) |
| 226 | + die("entry %s in tree %s is not a tree", |
| 227 | + name, sha1_to_hex(hash1)); |
| 228 | + rewrite_here = (unsigned char *) sha1; |
| 229 | + break; |
| 230 | + } |
| 231 | + update_tree_entry(&desc); |
| 232 | + } |
| 233 | + if (!rewrite_here) |
| 234 | + die("entry %.*s not found in tree %s", |
| 235 | + toplen, prefix, sha1_to_hex(hash1)); |
| 236 | + if (subpath) { |
| 237 | + status = splice_tree(rewrite_here, subpath, hash2, subtree); |
| 238 | + if (status) |
| 239 | + return status; |
| 240 | + rewrite_with = subtree; |
| 241 | + } |
| 242 | + else |
| 243 | + rewrite_with = hash2; |
| 244 | + hashcpy(rewrite_here, rewrite_with); |
| 245 | + status = write_sha1_file(buf, sz, tree_type, result); |
| 246 | + free(buf); |
| 247 | + return status; |
| 248 | +} |
| 249 | + |
| 250 | +/* |
| 251 | + * We are trying to come up with a merge between one and two that |
| 252 | + * results in a tree shape similar to one. The tree two might |
| 253 | + * correspond to a subtree of one, in which case it needs to be |
| 254 | + * shifted down by prefixing otherwise empty directories. On the |
| 255 | + * other hand, it could cover tree one and we might need to pick a |
| 256 | + * subtree of it. |
| 257 | + */ |
| 258 | +void shift_tree(const unsigned char *hash1, |
| 259 | + const unsigned char *hash2, |
| 260 | + unsigned char *shifted, |
| 261 | + int depth_limit) |
| 262 | +{ |
| 263 | + char *add_prefix; |
| 264 | + char *del_prefix; |
| 265 | + int add_score, del_score; |
| 266 | + |
| 267 | + add_score = del_score = score_trees(hash1, hash2); |
| 268 | + add_prefix = xcalloc(1, 1); |
| 269 | + del_prefix = xcalloc(1, 1); |
| 270 | + |
| 271 | + /* |
| 272 | + * See if one's subtree resembles two; if so we need to prefix |
| 273 | + * two with a few fake trees to match the prefix. |
| 274 | + */ |
| 275 | + match_trees(hash1, hash2, &add_score, &add_prefix, "", depth_limit); |
| 276 | + |
| 277 | + /* |
| 278 | + * See if two's subtree resembles one; if so we need to |
| 279 | + * pick only subtree of two. |
| 280 | + */ |
| 281 | + match_trees(hash2, hash1, &del_score, &del_prefix, "", depth_limit); |
| 282 | + |
| 283 | + /* Assume we do not have to do any shifting */ |
| 284 | + hashcpy(shifted, hash2); |
| 285 | + |
| 286 | + if (add_score < del_score) { |
| 287 | + /* We need to pick a subtree of two */ |
| 288 | + unsigned mode; |
| 289 | + |
| 290 | + if (!*del_prefix) |
| 291 | + return; |
| 292 | + |
| 293 | + if (get_tree_entry(hash2, del_prefix, shifted, &mode)) |
| 294 | + die("cannot find path %s in tree %s", |
| 295 | + del_prefix, sha1_to_hex(hash2)); |
| 296 | + return; |
| 297 | + } |
| 298 | + |
| 299 | + if (!*add_prefix) |
| 300 | + return; |
| 301 | + |
| 302 | + splice_tree(hash1, add_prefix, hash2, shifted); |
| 303 | +} |
| 304 | + |
0 commit comments