Skip to content

Commit 81afdf7

Browse files
newrengitster
authored andcommitted
diffcore-rename: compute dir_rename_guess from dir_rename_counts
dir_rename_counts has a mapping of a mapping, in particular, it has old_dir => { new_dir => count } We want a simple mapping of old_dir => new_dir based on which new_dir had the highest count for a given old_dir. Compute this and store it in dir_rename_guess. This is the final piece of the puzzle needed to make our guesses at which directory files have been moved to when basenames aren't unique. For the testcases mentioned in commit 557ac03 ("merge-ort: begin performance work; instrument with trace2_region_* calls", 2020-10-28), this change improves the performance as follows: Before After no-renames: 12.775 s ± 0.062 s 12.596 s ± 0.061 s mega-renames: 188.754 s ± 0.284 s 130.465 s ± 0.259 s just-one-mega: 5.599 s ± 0.019 s 3.958 s ± 0.010 s Reviewed-by: Derrick Stolee <[email protected]> Signed-off-by: Elijah Newren <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 333899e commit 81afdf7

File tree

1 file changed

+41
-4
lines changed

1 file changed

+41
-4
lines changed

diffcore-rename.c

Lines changed: 41 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -389,6 +389,24 @@ static void dirname_munge(char *filename)
389389
*slash = '\0';
390390
}
391391

392+
static const char *get_highest_rename_path(struct strintmap *counts)
393+
{
394+
int highest_count = 0;
395+
const char *highest_destination_dir = NULL;
396+
struct hashmap_iter iter;
397+
struct strmap_entry *entry;
398+
399+
strintmap_for_each_entry(counts, &iter, entry) {
400+
const char *destination_dir = entry->key;
401+
intptr_t count = (intptr_t)entry->value;
402+
if (count > highest_count) {
403+
highest_count = count;
404+
highest_destination_dir = destination_dir;
405+
}
406+
}
407+
return highest_destination_dir;
408+
}
409+
392410
static void increment_count(struct dir_rename_info *info,
393411
char *old_dir,
394412
char *new_dir)
@@ -512,6 +530,8 @@ static void initialize_dir_rename_info(struct dir_rename_info *info,
512530
struct strset *dirs_removed,
513531
struct strmap *dir_rename_count)
514532
{
533+
struct hashmap_iter iter;
534+
struct strmap_entry *entry;
515535
int i;
516536

517537
if (!dirs_removed) {
@@ -558,6 +578,23 @@ static void initialize_dir_rename_info(struct dir_rename_info *info,
558578
rename_dst[i].p->one->path,
559579
rename_dst[i].p->two->path);
560580
}
581+
582+
/*
583+
* Now we collapse
584+
* dir_rename_count: old_directory -> {new_directory -> count}
585+
* down to
586+
* dir_rename_guess: old_directory -> best_new_directory
587+
* where best_new_directory is the one with the highest count.
588+
*/
589+
strmap_for_each_entry(info->dir_rename_count, &iter, entry) {
590+
/* entry->key is source_dir */
591+
struct strintmap *counts = entry->value;
592+
char *best_newdir;
593+
594+
best_newdir = xstrdup(get_highest_rename_path(counts));
595+
strmap_put(&info->dir_rename_guess, entry->key,
596+
best_newdir);
597+
}
561598
}
562599

563600
void partial_clear_dir_rename_count(struct strmap *dir_rename_count)
@@ -682,10 +719,10 @@ static int idx_possible_rename(char *filename, struct dir_rename_info *info)
682719
* rename.
683720
*
684721
* This function, idx_possible_rename(), is only responsible for (4).
685-
* The conditions/steps in (1)-(3) will be handled via setting up
686-
* dir_rename_count and dir_rename_guess in a future
687-
* initialize_dir_rename_info() function. Steps (0) and (5) are
688-
* handled by the caller of this function.
722+
* The conditions/steps in (1)-(3) are handled via setting up
723+
* dir_rename_count and dir_rename_guess in
724+
* initialize_dir_rename_info(). Steps (0) and (5) are handled by
725+
* the caller of this function.
689726
*/
690727
char *old_dir, *new_dir;
691728
struct strbuf new_path = STRBUF_INIT;

0 commit comments

Comments
 (0)