Skip to content

Commit 7fe40b8

Browse files
newrengitster
authored andcommitted
merge-recursive: add get_directory_renames()
This populates a set of directory renames for us. The set of directory renames is not yet used, but will be in subsequent commits. Note that the use of a string_list for possible_new_dirs in the new dir_rename_entry struct implies an O(n^2) algorithm; however, in practice I expect the number of distinct directories that files were renamed into from a single original directory to be O(1). My guess is that n has a mode of 1 and a mean of less than 2, so, for now, string_list seems good enough for possible_new_dirs. Reviewed-by: Stefan Beller <[email protected]> Signed-off-by: Elijah Newren <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent ffc16c4 commit 7fe40b8

File tree

2 files changed

+239
-3
lines changed

2 files changed

+239
-3
lines changed

merge-recursive.c

Lines changed: 221 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -49,6 +49,44 @@ static unsigned int path_hash(const char *path)
4949
return ignore_case ? strihash(path) : strhash(path);
5050
}
5151

52+
static struct dir_rename_entry *dir_rename_find_entry(struct hashmap *hashmap,
53+
char *dir)
54+
{
55+
struct dir_rename_entry key;
56+
57+
if (dir == NULL)
58+
return NULL;
59+
hashmap_entry_init(&key, strhash(dir));
60+
key.dir = dir;
61+
return hashmap_get(hashmap, &key, NULL);
62+
}
63+
64+
static int dir_rename_cmp(const void *unused_cmp_data,
65+
const void *entry,
66+
const void *entry_or_key,
67+
const void *unused_keydata)
68+
{
69+
const struct dir_rename_entry *e1 = entry;
70+
const struct dir_rename_entry *e2 = entry_or_key;
71+
72+
return strcmp(e1->dir, e2->dir);
73+
}
74+
75+
static void dir_rename_init(struct hashmap *map)
76+
{
77+
hashmap_init(map, dir_rename_cmp, NULL, 0);
78+
}
79+
80+
static void dir_rename_entry_init(struct dir_rename_entry *entry,
81+
char *directory)
82+
{
83+
hashmap_entry_init(entry, strhash(directory));
84+
entry->dir = directory;
85+
entry->non_unique_new_dir = 0;
86+
strbuf_init(&entry->new_dir, 0);
87+
string_list_init(&entry->possible_new_dirs, 0);
88+
}
89+
5290
static void flush_output(struct merge_options *o)
5391
{
5492
if (o->buffer_output < 2 && o->obuf.len) {
@@ -1357,6 +1395,169 @@ static struct diff_queue_struct *get_diffpairs(struct merge_options *o,
13571395
return ret;
13581396
}
13591397

1398+
static void get_renamed_dir_portion(const char *old_path, const char *new_path,
1399+
char **old_dir, char **new_dir)
1400+
{
1401+
char *end_of_old, *end_of_new;
1402+
int old_len, new_len;
1403+
1404+
*old_dir = NULL;
1405+
*new_dir = NULL;
1406+
1407+
/*
1408+
* For
1409+
* "a/b/c/d/e/foo.c" -> "a/b/some/thing/else/e/foo.c"
1410+
* the "e/foo.c" part is the same, we just want to know that
1411+
* "a/b/c/d" was renamed to "a/b/some/thing/else"
1412+
* so, for this example, this function returns "a/b/c/d" in
1413+
* *old_dir and "a/b/some/thing/else" in *new_dir.
1414+
*
1415+
* Also, if the basename of the file changed, we don't care. We
1416+
* want to know which portion of the directory, if any, changed.
1417+
*/
1418+
end_of_old = strrchr(old_path, '/');
1419+
end_of_new = strrchr(new_path, '/');
1420+
1421+
if (end_of_old == NULL || end_of_new == NULL)
1422+
return;
1423+
while (*--end_of_new == *--end_of_old &&
1424+
end_of_old != old_path &&
1425+
end_of_new != new_path)
1426+
; /* Do nothing; all in the while loop */
1427+
/*
1428+
* We've found the first non-matching character in the directory
1429+
* paths. That means the current directory we were comparing
1430+
* represents the rename. Move end_of_old and end_of_new back
1431+
* to the full directory name.
1432+
*/
1433+
if (*end_of_old == '/')
1434+
end_of_old++;
1435+
if (*end_of_old != '/')
1436+
end_of_new++;
1437+
end_of_old = strchr(end_of_old, '/');
1438+
end_of_new = strchr(end_of_new, '/');
1439+
1440+
/*
1441+
* It may have been the case that old_path and new_path were the same
1442+
* directory all along. Don't claim a rename if they're the same.
1443+
*/
1444+
old_len = end_of_old - old_path;
1445+
new_len = end_of_new - new_path;
1446+
1447+
if (old_len != new_len || strncmp(old_path, new_path, old_len)) {
1448+
*old_dir = xstrndup(old_path, old_len);
1449+
*new_dir = xstrndup(new_path, new_len);
1450+
}
1451+
}
1452+
1453+
static struct hashmap *get_directory_renames(struct diff_queue_struct *pairs,
1454+
struct tree *tree)
1455+
{
1456+
struct hashmap *dir_renames;
1457+
struct hashmap_iter iter;
1458+
struct dir_rename_entry *entry;
1459+
int i;
1460+
1461+
/*
1462+
* Typically, we think of a directory rename as all files from a
1463+
* certain directory being moved to a target directory. However,
1464+
* what if someone first moved two files from the original
1465+
* directory in one commit, and then renamed the directory
1466+
* somewhere else in a later commit? At merge time, we just know
1467+
* that files from the original directory went to two different
1468+
* places, and that the bulk of them ended up in the same place.
1469+
* We want each directory rename to represent where the bulk of the
1470+
* files from that directory end up; this function exists to find
1471+
* where the bulk of the files went.
1472+
*
1473+
* The first loop below simply iterates through the list of file
1474+
* renames, finding out how often each directory rename pair
1475+
* possibility occurs.
1476+
*/
1477+
dir_renames = xmalloc(sizeof(*dir_renames));
1478+
dir_rename_init(dir_renames);
1479+
for (i = 0; i < pairs->nr; ++i) {
1480+
struct string_list_item *item;
1481+
int *count;
1482+
struct diff_filepair *pair = pairs->queue[i];
1483+
char *old_dir, *new_dir;
1484+
1485+
/* File not part of directory rename if it wasn't renamed */
1486+
if (pair->status != 'R')
1487+
continue;
1488+
1489+
get_renamed_dir_portion(pair->one->path, pair->two->path,
1490+
&old_dir, &new_dir);
1491+
if (!old_dir)
1492+
/* Directory didn't change at all; ignore this one. */
1493+
continue;
1494+
1495+
entry = dir_rename_find_entry(dir_renames, old_dir);
1496+
if (!entry) {
1497+
entry = xmalloc(sizeof(*entry));
1498+
dir_rename_entry_init(entry, old_dir);
1499+
hashmap_put(dir_renames, entry);
1500+
} else {
1501+
free(old_dir);
1502+
}
1503+
item = string_list_lookup(&entry->possible_new_dirs, new_dir);
1504+
if (!item) {
1505+
item = string_list_insert(&entry->possible_new_dirs,
1506+
new_dir);
1507+
item->util = xcalloc(1, sizeof(int));
1508+
} else {
1509+
free(new_dir);
1510+
}
1511+
count = item->util;
1512+
*count += 1;
1513+
}
1514+
1515+
/*
1516+
* For each directory with files moved out of it, we find out which
1517+
* target directory received the most files so we can declare it to
1518+
* be the "winning" target location for the directory rename. This
1519+
* winner gets recorded in new_dir. If there is no winner
1520+
* (multiple target directories received the same number of files),
1521+
* we set non_unique_new_dir. Once we've determined the winner (or
1522+
* that there is no winner), we no longer need possible_new_dirs.
1523+
*/
1524+
hashmap_iter_init(dir_renames, &iter);
1525+
while ((entry = hashmap_iter_next(&iter))) {
1526+
int max = 0;
1527+
int bad_max = 0;
1528+
char *best = NULL;
1529+
1530+
for (i = 0; i < entry->possible_new_dirs.nr; i++) {
1531+
int *count = entry->possible_new_dirs.items[i].util;
1532+
1533+
if (*count == max)
1534+
bad_max = max;
1535+
else if (*count > max) {
1536+
max = *count;
1537+
best = entry->possible_new_dirs.items[i].string;
1538+
}
1539+
}
1540+
if (bad_max == max)
1541+
entry->non_unique_new_dir = 1;
1542+
else {
1543+
assert(entry->new_dir.len == 0);
1544+
strbuf_addstr(&entry->new_dir, best);
1545+
}
1546+
/*
1547+
* The relevant directory sub-portion of the original full
1548+
* filepaths were xstrndup'ed before inserting into
1549+
* possible_new_dirs, and instead of manually iterating the
1550+
* list and free'ing each, just lie and tell
1551+
* possible_new_dirs that it did the strdup'ing so that it
1552+
* will free them for us.
1553+
*/
1554+
entry->possible_new_dirs.strdup_strings = 1;
1555+
string_list_clear(&entry->possible_new_dirs, 1);
1556+
}
1557+
1558+
return dir_renames;
1559+
}
1560+
13601561
/*
13611562
* Get information of all renames which occurred in 'pairs', making use of
13621563
* any implicit directory renames inferred from the other side of history.
@@ -1668,8 +1869,21 @@ struct rename_info {
16681869
struct string_list *merge_renames;
16691870
};
16701871

1671-
static void initial_cleanup_rename(struct diff_queue_struct *pairs)
1872+
static void initial_cleanup_rename(struct diff_queue_struct *pairs,
1873+
struct hashmap *dir_renames)
16721874
{
1875+
struct hashmap_iter iter;
1876+
struct dir_rename_entry *e;
1877+
1878+
hashmap_iter_init(dir_renames, &iter);
1879+
while ((e = hashmap_iter_next(&iter))) {
1880+
free(e->dir);
1881+
strbuf_release(&e->new_dir);
1882+
/* possible_new_dirs already cleared in get_directory_renames */
1883+
}
1884+
hashmap_free(dir_renames, 1);
1885+
free(dir_renames);
1886+
16731887
free(pairs->queue);
16741888
free(pairs);
16751889
}
@@ -1682,6 +1896,7 @@ static int handle_renames(struct merge_options *o,
16821896
struct rename_info *ri)
16831897
{
16841898
struct diff_queue_struct *head_pairs, *merge_pairs;
1899+
struct hashmap *dir_re_head, *dir_re_merge;
16851900
int clean;
16861901

16871902
ri->head_renames = NULL;
@@ -1693,6 +1908,9 @@ static int handle_renames(struct merge_options *o,
16931908
head_pairs = get_diffpairs(o, common, head);
16941909
merge_pairs = get_diffpairs(o, common, merge);
16951910

1911+
dir_re_head = get_directory_renames(head_pairs, head);
1912+
dir_re_merge = get_directory_renames(merge_pairs, merge);
1913+
16961914
ri->head_renames = get_renames(o, head_pairs, head,
16971915
common, head, merge, entries);
16981916
ri->merge_renames = get_renames(o, merge_pairs, merge,
@@ -1704,8 +1922,8 @@ static int handle_renames(struct merge_options *o,
17041922
* data structures are still needed and referenced in
17051923
* process_entry(). But there are a few things we can free now.
17061924
*/
1707-
initial_cleanup_rename(head_pairs);
1708-
initial_cleanup_rename(merge_pairs);
1925+
initial_cleanup_rename(head_pairs, dir_re_head);
1926+
initial_cleanup_rename(merge_pairs, dir_re_merge);
17091927

17101928
return clean;
17111929
}

merge-recursive.h

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,24 @@ struct merge_options {
2929
struct string_list df_conflict_file_set;
3030
};
3131

32+
/*
33+
* For dir_rename_entry, directory names are stored as a full path from the
34+
* toplevel of the repository and do not include a trailing '/'. Also:
35+
*
36+
* dir: original name of directory being renamed
37+
* non_unique_new_dir: if true, could not determine new_dir
38+
* new_dir: final name of directory being renamed
39+
* possible_new_dirs: temporary used to help determine new_dir; see comments
40+
* in get_directory_renames() for details
41+
*/
42+
struct dir_rename_entry {
43+
struct hashmap_entry ent; /* must be the first member! */
44+
char *dir;
45+
unsigned non_unique_new_dir:1;
46+
struct strbuf new_dir;
47+
struct string_list possible_new_dirs;
48+
};
49+
3250
/* merge_trees() but with recursive ancestor consolidation */
3351
int merge_recursive(struct merge_options *o,
3452
struct commit *h1,

0 commit comments

Comments
 (0)