Skip to content

Commit 7bee6c1

Browse files
newrengitster
authored andcommitted
merge-ort: avoid recursing into directories when we don't need to
This combines the work of the last several patches, and implements the conditions when we don't need to recurse into directories. It's perhaps easiest to see the logic by separating the fact that a directory might have both rename sources and rename destinations: * rename sources: only files present in the merge base can serve as rename sources, and only when one side deletes that file. When the tree on one side matches the merge base, that means every file within the subtree matches the merge base. This means that the skip-irrelevant-rename-detection optimization from before kicks in and we don't need any of these files as rename sources. * rename destinations: the tree that does not match the merge base might have newly added and hence unmatched destination files. This is what usually prevents us from doing trivial directory resolutions in the merge machinery. However, the fact that we have deferred recursing into this directory until the end means we know whether there are any unmatched relevant potential rename sources elsewhere in this merge. If there are no unmatched such relevant sources anywhere, then there is no need to look for unmatched potential rename destinations to match them with. This informs our algorithm: * Search through relevant_sources; if we have entries, they better all be reflected in cached_pairs or cached_irrelevant, otherwise they represent an unmatched potential rename source (causing the optimization to be disallowed). * For any relevant_source represented in cached_pairs, we do need to to make sure to get the destination for each source, meaning we need to recurse into any ancestor directories of those destinations. * Once we've recursed into all the rename destinations for any relevant_sources in cached_pairs, we can then do the trivial directory resolution for the remaining directories. For the testcases mentioned in commit 557ac03 ("merge-ort: begin performance work; instrument with trace2_region_* calls", 2020-10-28), this change improves the performance as follows: Before After no-renames: 5.235 s ± 0.042 s 205.1 ms ± 3.8 ms mega-renames: 9.419 s ± 0.107 s 1.564 s ± 0.010 s just-one-mega: 480.1 ms ± 3.9 ms 479.5 ms ± 3.9 ms Signed-off-by: Elijah Newren <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 5e1ca57 commit 7bee6c1

File tree

1 file changed

+99
-3
lines changed

1 file changed

+99
-3
lines changed

merge-ort.c

Lines changed: 99 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1230,6 +1230,18 @@ static int collect_merge_info_callback(int n,
12301230
return mask;
12311231
}
12321232

1233+
static void resolve_trivial_directory_merge(struct conflict_info *ci, int side)
1234+
{
1235+
VERIFY_CI(ci);
1236+
assert((side == 1 && ci->match_mask == 5) ||
1237+
(side == 2 && ci->match_mask == 3));
1238+
oidcpy(&ci->merged.result.oid, &ci->stages[side].oid);
1239+
ci->merged.result.mode = ci->stages[side].mode;
1240+
ci->merged.is_null = is_null_oid(&ci->stages[side].oid);
1241+
ci->match_mask = 0;
1242+
ci->merged.clean = 1; /* (ci->filemask == 0); */
1243+
}
1244+
12331245
static int handle_deferred_entries(struct merge_options *opt,
12341246
struct traverse_info *info)
12351247
{
@@ -1239,9 +1251,72 @@ static int handle_deferred_entries(struct merge_options *opt,
12391251
int side, ret = 0;
12401252

12411253
for (side = MERGE_SIDE1; side <= MERGE_SIDE2; side++) {
1242-
renames->deferred[side].trivial_merges_okay = 0;
1243-
strintmap_for_each_entry(&renames->deferred[side].possible_trivial_merges,
1244-
&iter, entry) {
1254+
unsigned optimization_okay = 1;
1255+
struct strintmap copy;
1256+
1257+
/* Loop over the set of paths we need to know rename info for */
1258+
strset_for_each_entry(&renames->relevant_sources[side],
1259+
&iter, entry) {
1260+
char *rename_target, *dir, *dir_marker;
1261+
struct strmap_entry *e;
1262+
1263+
/*
1264+
* If we don't know delete/rename info for this path,
1265+
* then we need to recurse into all trees to get all
1266+
* adds to make sure we have it.
1267+
*/
1268+
if (strset_contains(&renames->cached_irrelevant[side],
1269+
entry->key))
1270+
continue;
1271+
e = strmap_get_entry(&renames->cached_pairs[side],
1272+
entry->key);
1273+
if (!e) {
1274+
optimization_okay = 0;
1275+
break;
1276+
}
1277+
1278+
/* If this is a delete, we have enough info already */
1279+
rename_target = e->value;
1280+
if (!rename_target)
1281+
continue;
1282+
1283+
/* If we already walked the rename target, we're good */
1284+
if (strmap_contains(&opt->priv->paths, rename_target))
1285+
continue;
1286+
1287+
/*
1288+
* Otherwise, we need to get a list of directories that
1289+
* will need to be recursed into to get this
1290+
* rename_target.
1291+
*/
1292+
dir = xstrdup(rename_target);
1293+
while ((dir_marker = strrchr(dir, '/'))) {
1294+
*dir_marker = '\0';
1295+
if (strset_contains(&renames->deferred[side].target_dirs,
1296+
dir))
1297+
break;
1298+
strset_add(&renames->deferred[side].target_dirs,
1299+
dir);
1300+
}
1301+
free(dir);
1302+
}
1303+
renames->deferred[side].trivial_merges_okay = optimization_okay;
1304+
/*
1305+
* We need to recurse into any directories in
1306+
* possible_trivial_merges[side] found in target_dirs[side].
1307+
* But when we recurse, we may need to queue up some of the
1308+
* subdirectories for possible_trivial_merges[side]. Since
1309+
* we can't safely iterate through a hashmap while also adding
1310+
* entries, move the entries into 'copy', iterate over 'copy',
1311+
* and then we'll also iterate anything added into
1312+
* possible_trivial_merges[side] once this loop is done.
1313+
*/
1314+
copy = renames->deferred[side].possible_trivial_merges;
1315+
strintmap_init_with_options(&renames->deferred[side].possible_trivial_merges,
1316+
0,
1317+
NULL,
1318+
0);
1319+
strintmap_for_each_entry(&copy, &iter, entry) {
12451320
const char *path = entry->key;
12461321
unsigned dir_rename_mask = (intptr_t)entry->value;
12471322
struct conflict_info *ci;
@@ -1254,6 +1329,13 @@ static int handle_deferred_entries(struct merge_options *opt,
12541329
VERIFY_CI(ci);
12551330
dirmask = ci->dirmask;
12561331

1332+
if (optimization_okay &&
1333+
!strset_contains(&renames->deferred[side].target_dirs,
1334+
path)) {
1335+
resolve_trivial_directory_merge(ci, side);
1336+
continue;
1337+
}
1338+
12571339
info->name = path;
12581340
info->namelen = strlen(path);
12591341
info->pathlen = info->namelen + 1;
@@ -1289,6 +1371,20 @@ static int handle_deferred_entries(struct merge_options *opt,
12891371
if (ret < 0)
12901372
return ret;
12911373
}
1374+
strintmap_clear(&copy);
1375+
strintmap_for_each_entry(&renames->deferred[side].possible_trivial_merges,
1376+
&iter, entry) {
1377+
const char *path = entry->key;
1378+
struct conflict_info *ci;
1379+
1380+
ci = strmap_get(&opt->priv->paths, path);
1381+
VERIFY_CI(ci);
1382+
1383+
assert(renames->deferred[side].trivial_merges_okay &&
1384+
!strset_contains(&renames->deferred[side].target_dirs,
1385+
path));
1386+
resolve_trivial_directory_merge(ci, side);
1387+
}
12921388
}
12931389
return ret;
12941390
}

0 commit comments

Comments
 (0)