Skip to content

Commit fa0e936

Browse files
newrengitster
authored andcommitted
diffcore-rename: use a mem_pool for exact rename detection's hashmap
Exact rename detection, via insert_file_table(), uses a hashmap to store files by oid. Use a mem_pool for the hashmap entries so these can all be allocated and deallocated together. For the testcases mentioned in commit 557ac03 ("merge-ort: begin performance work; instrument with trace2_region_* calls", 2020-10-28), this change improves the performance as follows: Before After no-renames: 204.2 ms ± 3.0 ms 202.5 ms ± 3.2 ms mega-renames: 1.076 s ± 0.015 s 1.072 s ± 0.012 s just-one-mega: 364.1 ms ± 7.0 ms 357.3 ms ± 3.9 ms Signed-off-by: Elijah Newren <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 7afc0b0 commit fa0e936

File tree

1 file changed

+16
-6
lines changed

1 file changed

+16
-6
lines changed

diffcore-rename.c

Lines changed: 16 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -317,10 +317,11 @@ static int find_identical_files(struct hashmap *srcs,
317317
}
318318

319319
static void insert_file_table(struct repository *r,
320+
struct mem_pool *pool,
320321
struct hashmap *table, int index,
321322
struct diff_filespec *filespec)
322323
{
323-
struct file_similarity *entry = xmalloc(sizeof(*entry));
324+
struct file_similarity *entry = mem_pool_alloc(pool, sizeof(*entry));
324325

325326
entry->index = index;
326327
entry->filespec = filespec;
@@ -336,7 +337,8 @@ static void insert_file_table(struct repository *r,
336337
* and then during the second round we try to match
337338
* cache-dirty entries as well.
338339
*/
339-
static int find_exact_renames(struct diff_options *options)
340+
static int find_exact_renames(struct diff_options *options,
341+
struct mem_pool *pool)
340342
{
341343
int i, renames = 0;
342344
struct hashmap file_table;
@@ -346,16 +348,16 @@ static int find_exact_renames(struct diff_options *options)
346348
*/
347349
hashmap_init(&file_table, NULL, NULL, rename_src_nr);
348350
for (i = rename_src_nr-1; i >= 0; i--)
349-
insert_file_table(options->repo,
351+
insert_file_table(options->repo, pool,
350352
&file_table, i,
351353
rename_src[i].p->one);
352354

353355
/* Walk the destinations and find best source match */
354356
for (i = 0; i < rename_dst_nr; i++)
355357
renames += find_identical_files(&file_table, i, options);
356358

357-
/* Free the hash data structure and entries */
358-
hashmap_clear_and_free(&file_table, struct file_similarity, entry);
359+
/* Free the hash data structure (entries will be freed with the pool) */
360+
hashmap_clear(&file_table);
359361

360362
return renames;
361363
}
@@ -1341,6 +1343,7 @@ void diffcore_rename_extended(struct diff_options *options,
13411343
int num_destinations, dst_cnt;
13421344
int num_sources, want_copies;
13431345
struct progress *progress = NULL;
1346+
struct mem_pool local_pool;
13441347
struct dir_rename_info info;
13451348
struct diff_populate_filespec_options dpf_options = {
13461349
.check_binary = 0,
@@ -1409,11 +1412,18 @@ void diffcore_rename_extended(struct diff_options *options,
14091412
goto cleanup; /* nothing to do */
14101413

14111414
trace2_region_enter("diff", "exact renames", options->repo);
1415+
mem_pool_init(&local_pool, 32*1024);
14121416
/*
14131417
* We really want to cull the candidates list early
14141418
* with cheap tests in order to avoid doing deltas.
14151419
*/
1416-
rename_count = find_exact_renames(options);
1420+
rename_count = find_exact_renames(options, &local_pool);
1421+
/*
1422+
* Discard local_pool immediately instead of at "cleanup:" in order
1423+
* to reduce maximum memory usage; inexact rename detection uses up
1424+
* a fair amount of memory, and mem_pools can too.
1425+
*/
1426+
mem_pool_discard(&local_pool, 0);
14171427
trace2_region_leave("diff", "exact renames", options->repo);
14181428

14191429
/* Did we only want exact renames? */

0 commit comments

Comments
 (0)