Skip to content

Commit d29bd6d

Browse files
newrengitster
authored andcommitted
merge-ort: add data structures for in-memory caching of rename detection
When there are many renames between the old base of a series of commits and the new base for a series of commits, the sequence of merges employed to transplant those commits (from a cherry-pick or rebase operation) will repeatedly detect the exact same renames. This is wasted effort. Add data structures which will be used to cache rename detection results, along with the initialization and deallocation of these data structures. Future commits will populate these caches, detect the appropriate circumstances when they can be used, and employ them to avoid re-detecting the same renames repeatedly. Signed-off-by: Elijah Newren <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent a22099f commit d29bd6d

File tree

1 file changed

+53
-0
lines changed

1 file changed

+53
-0
lines changed

merge-ort.c

Lines changed: 53 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -139,6 +139,48 @@ struct rename_info {
139139
int callback_data_nr, callback_data_alloc;
140140
char *callback_data_traverse_path;
141141

142+
/*
143+
* cached_pairs: Caching of renames and deletions.
144+
*
145+
* These are mappings recording renames and deletions of individual
146+
* files (not directories). They are thus a map from an old
147+
* filename to either NULL (for deletions) or a new filename (for
148+
* renames).
149+
*/
150+
struct strmap cached_pairs[3];
151+
152+
/*
153+
* cached_target_names: just the destinations from cached_pairs
154+
*
155+
* We sometimes want a fast lookup to determine if a given filename
156+
* is one of the destinations in cached_pairs. cached_target_names
157+
* is thus duplicative information, but it provides a fast lookup.
158+
*/
159+
struct strset cached_target_names[3];
160+
161+
/*
162+
* cached_irrelevant: Caching of rename_sources that aren't relevant.
163+
*
164+
* If we try to detect a rename for a source path and succeed, it's
165+
* part of a rename. If we try to detect a rename for a source path
166+
* and fail, then it's a delete. If we do not try to detect a rename
167+
* for a path, then we don't know if it's a rename or a delete. If
168+
* merge-ort doesn't think the path is relevant, then we just won't
169+
* cache anything for that path. But there's a slight problem in
170+
* that merge-ort can think a path is RELEVANT_LOCATION, but due to
171+
* commit 9bd342137e ("diffcore-rename: determine which
172+
* relevant_sources are no longer relevant", 2021-03-13),
173+
* diffcore-rename can downgrade the path to RELEVANT_NO_MORE. To
174+
* avoid excessive calls to diffcore_rename_extended() we still need
175+
* to cache such paths, though we cannot record them as either
176+
* renames or deletes. So we cache them here as a "turned out to be
177+
* irrelevant *for this commit*" as they are often also irrelevant
178+
* for subsequent commits, though we will have to do some extra
179+
* checking to see whether such paths become relevant for rename
180+
* detection when cherry-picking/rebasing subsequent commits.
181+
*/
182+
struct strset cached_irrelevant[3];
183+
142184
/*
143185
* needed_limit: value needed for inexact rename detection to run
144186
*
@@ -381,6 +423,8 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
381423
reinitialize ? strmap_partial_clear : strmap_clear;
382424
void (*strintmap_func)(struct strintmap *) =
383425
reinitialize ? strintmap_partial_clear : strintmap_clear;
426+
void (*strset_func)(struct strset *) =
427+
reinitialize ? strset_partial_clear : strset_clear;
384428

385429
/*
386430
* We marked opti->paths with strdup_strings = 0, so that we
@@ -424,6 +468,9 @@ static void clear_or_reinit_internal_opts(struct merge_options_internal *opti,
424468
strmap_func(&renames->dir_renames[i], 0);
425469

426470
strintmap_func(&renames->relevant_sources[i]);
471+
strset_func(&renames->cached_target_names[i]);
472+
strmap_func(&renames->cached_pairs[i], 1);
473+
strset_func(&renames->cached_irrelevant[i]);
427474
}
428475

429476
if (!reinitialize) {
@@ -3675,6 +3722,12 @@ static void merge_start(struct merge_options *opt, struct merge_result *result)
36753722
NULL, 0);
36763723
strintmap_init_with_options(&renames->relevant_sources[i],
36773724
0, NULL, 0);
3725+
strmap_init_with_options(&renames->cached_pairs[i],
3726+
NULL, 1);
3727+
strset_init_with_options(&renames->cached_irrelevant[i],
3728+
NULL, 1);
3729+
strset_init_with_options(&renames->cached_target_names[i],
3730+
NULL, 0);
36783731
}
36793732

36803733
/*

0 commit comments

Comments
 (0)