Skip to content

Commit fc65b00

Browse files
Kevin Willfordgitster
authored andcommitted
merge-recursive: change current file dir string_lists to hashmap
The code was using two string_lists, one for the directories and one for the files. The code never checks the lists independently so we should be able to only use one list. The string_list also is a O(log n) for lookup and insertion. Switching this to use a hashmap will give O(1) which will save some time when there are millions of paths that will be checked. Signed-off-by: Kevin Willford <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent ef9c4dc commit fc65b00

File tree

2 files changed

+46
-13
lines changed

2 files changed

+46
-13
lines changed

merge-recursive.c

Lines changed: 45 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,31 @@
2424
#include "dir.h"
2525
#include "submodule.h"
2626

27+
struct path_hashmap_entry {
28+
struct hashmap_entry e;
29+
char path[FLEX_ARRAY];
30+
};
31+
32+
static int path_hashmap_cmp(const void *cmp_data,
33+
const void *entry,
34+
const void *entry_or_key,
35+
const void *keydata)
36+
{
37+
const struct path_hashmap_entry *a = entry;
38+
const struct path_hashmap_entry *b = entry_or_key;
39+
const char *key = keydata;
40+
41+
if (ignore_case)
42+
return strcasecmp(a->path, key ? key : b->path);
43+
else
44+
return strcmp(a->path, key ? key : b->path);
45+
}
46+
47+
static unsigned int path_hash(const char *path)
48+
{
49+
return ignore_case ? strihash(path) : strhash(path);
50+
}
51+
2752
static void flush_output(struct merge_options *o)
2853
{
2954
if (o->buffer_output < 2 && o->obuf.len) {
@@ -314,15 +339,15 @@ static int save_files_dirs(const unsigned char *sha1,
314339
struct strbuf *base, const char *path,
315340
unsigned int mode, int stage, void *context)
316341
{
342+
struct path_hashmap_entry *entry;
317343
int baselen = base->len;
318344
struct merge_options *o = context;
319345

320346
strbuf_addstr(base, path);
321347

322-
if (S_ISDIR(mode))
323-
string_list_insert(&o->current_directory_set, base->buf);
324-
else
325-
string_list_insert(&o->current_file_set, base->buf);
348+
FLEX_ALLOC_MEM(entry, path, base->buf, base->len);
349+
hashmap_entry_init(entry, path_hash(entry->path));
350+
hashmap_add(&o->current_file_dir_set, entry);
326351

327352
strbuf_setlen(base, baselen);
328353
return (S_ISDIR(mode) ? READ_TREE_RECURSIVE : 0);
@@ -642,6 +667,7 @@ static void add_flattened_path(struct strbuf *out, const char *s)
642667

643668
static char *unique_path(struct merge_options *o, const char *path, const char *branch)
644669
{
670+
struct path_hashmap_entry *entry;
645671
struct strbuf newpath = STRBUF_INIT;
646672
int suffix = 0;
647673
size_t base_len;
@@ -650,14 +676,16 @@ static char *unique_path(struct merge_options *o, const char *path, const char *
650676
add_flattened_path(&newpath, branch);
651677

652678
base_len = newpath.len;
653-
while (string_list_has_string(&o->current_file_set, newpath.buf) ||
654-
string_list_has_string(&o->current_directory_set, newpath.buf) ||
679+
while (hashmap_get_from_hash(&o->current_file_dir_set,
680+
path_hash(newpath.buf), newpath.buf) ||
655681
(!o->call_depth && file_exists(newpath.buf))) {
656682
strbuf_setlen(&newpath, base_len);
657683
strbuf_addf(&newpath, "_%d", suffix++);
658684
}
659685

660-
string_list_insert(&o->current_file_set, newpath.buf);
686+
FLEX_ALLOC_MEM(entry, path, newpath.buf, newpath.len);
687+
hashmap_entry_init(entry, path_hash(entry->path));
688+
hashmap_add(&o->current_file_dir_set, entry);
661689
return strbuf_detach(&newpath, NULL);
662690
}
663691

@@ -1941,8 +1969,14 @@ int merge_trees(struct merge_options *o,
19411969
if (unmerged_cache()) {
19421970
struct string_list *entries, *re_head, *re_merge;
19431971
int i;
1944-
string_list_clear(&o->current_file_set, 1);
1945-
string_list_clear(&o->current_directory_set, 1);
1972+
/*
1973+
* Only need the hashmap while processing entries, so
1974+
* initialize it here and free it when we are done running
1975+
* through the entries. Keeping it in the merge_options as
1976+
* opposed to decaring a local hashmap is for convenience
1977+
* so that we don't have to pass it to around.
1978+
*/
1979+
hashmap_init(&o->current_file_dir_set, path_hashmap_cmp, NULL, 512);
19461980
get_files_dirs(o, head);
19471981
get_files_dirs(o, merge);
19481982

@@ -1978,6 +2012,8 @@ int merge_trees(struct merge_options *o,
19782012
string_list_clear(re_head, 0);
19792013
string_list_clear(entries, 1);
19802014

2015+
hashmap_free(&o->current_file_dir_set, 1);
2016+
19812017
free(re_merge);
19822018
free(re_head);
19832019
free(entries);
@@ -2179,8 +2215,6 @@ void init_merge_options(struct merge_options *o)
21792215
if (o->verbosity >= 5)
21802216
o->buffer_output = 0;
21812217
strbuf_init(&o->obuf, 0);
2182-
string_list_init(&o->current_file_set, 1);
2183-
string_list_init(&o->current_directory_set, 1);
21842218
string_list_init(&o->df_conflict_file_set, 1);
21852219
}
21862220

merge-recursive.h

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,8 +25,7 @@ struct merge_options {
2525
int show_rename_progress;
2626
int call_depth;
2727
struct strbuf obuf;
28-
struct string_list current_file_set;
29-
struct string_list current_directory_set;
28+
struct hashmap current_file_dir_set;
3029
struct string_list df_conflict_file_set;
3130
};
3231

0 commit comments

Comments
 (0)