Skip to content

Commit 72962e8

Browse files
phillipwoodgitster
authored andcommitted
diff --color-moved: intern strings
Taking inspiration from xdl_classify_record() assign an id to each addition and deletion such that lines that match for the current --color-moved-ws mode share the same unique id. This reduces the number of hash lookups a little (calculating the ids still involves one hash lookup per line) but the main benefit is that when growing blocks of potentially moved lines we can replace string comparisons which involve chasing a pointer with a simple integer comparison. On a large diff this commit reduces the time to run 'diff --color-moved' by 37% compared to the previous commit and 31% compared to master, for 'diff --color-moved-ws=allow-indentation-change' the reduction is 28% compared to the previous commit and 96% compared to master. There is little change in the performance of 'git log --patch' as the diffs are smaller. Test HEAD^ HEAD --------------------------------------------------------------------------------------------------------------- 4002.1: diff --no-color-moved --no-color-moved-ws large change 0.38(0.33+0.05) 0.38(0.33+0.05) +0.0% 4002.2: diff --color-moved --no-color-moved-ws large change 0.88(0.81+0.06) 0.55(0.50+0.04) -37.5% 4002.3: diff --color-moved-ws=allow-indentation-change large change 0.85(0.79+0.06) 0.61(0.54+0.06) -28.2% 4002.4: log --no-color-moved --no-color-moved-ws 1.16(1.07+0.08) 1.15(1.09+0.05) -0.9% 4002.5: log --color-moved --no-color-moved-ws 1.31(1.22+0.08) 1.29(1.19+0.09) -1.5% 4002.6: log --color-moved-ws=allow-indentation-change 1.32(1.24+0.08) 1.31(1.18+0.13) -0.8% Test master HEAD --------------------------------------------------------------------------------------------------------------- 4002.1: diff --no-color-moved --no-color-moved-ws large change 0.38 (0.33+0.05) 0.38(0.33+0.05) +0.0% 4002.2: diff --color-moved --no-color-moved-ws large change 0.80 (0.75+0.04) 0.55(0.50+0.04) -31.2% 4002.3: diff --color-moved-ws=allow-indentation-change large change 14.20(14.15+0.05) 0.61(0.54+0.06) -95.7% 4002.4: log --no-color-moved --no-color-moved-ws 1.15 (1.05+0.09) 1.15(1.09+0.05) +0.0% 4002.5: log --color-moved --no-color-moved-ws 1.30 (1.19+0.11) 1.29(1.19+0.09) -0.8% 4002.6: log --color-moved-ws=allow-indentation-change 1.70 (1.63+0.06) 1.31(1.18+0.13) -22.9% Signed-off-by: Phillip Wood <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent b4a5c5c commit 72962e8

File tree

1 file changed

+96
-78
lines changed

1 file changed

+96
-78
lines changed

diff.c

Lines changed: 96 additions & 78 deletions
Original file line numberDiff line numberDiff line change
@@ -18,6 +18,7 @@
1818
#include "submodule-config.h"
1919
#include "submodule.h"
2020
#include "hashmap.h"
21+
#include "mem-pool.h"
2122
#include "ll-merge.h"
2223
#include "string-list.h"
2324
#include "strvec.h"
@@ -772,6 +773,7 @@ struct emitted_diff_symbol {
772773
int flags;
773774
int indent_off; /* Offset to first non-whitespace character */
774775
int indent_width; /* The visual width of the indentation */
776+
unsigned id;
775777
enum diff_symbol s;
776778
};
777779
#define EMITTED_DIFF_SYMBOL_INIT {NULL}
@@ -797,9 +799,9 @@ static void append_emitted_diff_symbol(struct diff_options *o,
797799
}
798800

799801
struct moved_entry {
800-
struct hashmap_entry ent;
801802
const struct emitted_diff_symbol *es;
802803
struct moved_entry *next_line;
804+
struct moved_entry *next_match;
803805
};
804806

805807
struct moved_block {
@@ -865,24 +867,24 @@ static int cmp_in_block_with_wsd(const struct moved_entry *cur,
865867
const struct emitted_diff_symbol *l,
866868
struct moved_block *pmb)
867869
{
868-
int al = cur->es->len, bl = l->len;
869-
const char *a = cur->es->line,
870-
*b = l->line;
871-
int a_off = cur->es->indent_off,
872-
a_width = cur->es->indent_width,
873-
b_off = l->indent_off,
874-
b_width = l->indent_width;
870+
int a_width = cur->es->indent_width, b_width = l->indent_width;
875871
int delta;
876872

877-
/* If 'l' and 'cur' are both blank then they match. */
878-
if (a_width == INDENT_BLANKLINE && b_width == INDENT_BLANKLINE)
873+
/* The text of each line must match */
874+
if (cur->es->id != l->id)
875+
return 1;
876+
877+
/*
878+
* If 'l' and 'cur' are both blank then we don't need to check the
879+
* indent. We only need to check cur as we know the strings match.
880+
* */
881+
if (a_width == INDENT_BLANKLINE)
879882
return 0;
880883

881884
/*
882885
* The indent changes of the block are known and stored in pmb->wsd;
883886
* however we need to check if the indent changes of the current line
884-
* match those of the current block and that the text of 'l' and 'cur'
885-
* after the indentation match.
887+
* match those of the current block.
886888
*/
887889
delta = b_width - a_width;
888890

@@ -893,78 +895,108 @@ static int cmp_in_block_with_wsd(const struct moved_entry *cur,
893895
if (pmb->wsd == INDENT_BLANKLINE)
894896
pmb->wsd = delta;
895897

896-
return !(delta == pmb->wsd && al - a_off == bl - b_off &&
897-
!memcmp(a + a_off, b + b_off, al - a_off));
898+
return delta != pmb->wsd;
898899
}
899900

900-
static int moved_entry_cmp(const void *hashmap_cmp_fn_data,
901-
const struct hashmap_entry *eptr,
902-
const struct hashmap_entry *entry_or_key,
903-
const void *keydata)
901+
struct interned_diff_symbol {
902+
struct hashmap_entry ent;
903+
struct emitted_diff_symbol *es;
904+
};
905+
906+
static int interned_diff_symbol_cmp(const void *hashmap_cmp_fn_data,
907+
const struct hashmap_entry *eptr,
908+
const struct hashmap_entry *entry_or_key,
909+
const void *keydata)
904910
{
905911
const struct diff_options *diffopt = hashmap_cmp_fn_data;
906912
const struct emitted_diff_symbol *a, *b;
907913
unsigned flags = diffopt->color_moved_ws_handling
908914
& XDF_WHITESPACE_FLAGS;
909915

910-
a = container_of(eptr, const struct moved_entry, ent)->es;
911-
b = container_of(entry_or_key, const struct moved_entry, ent)->es;
916+
a = container_of(eptr, const struct interned_diff_symbol, ent)->es;
917+
b = container_of(entry_or_key, const struct interned_diff_symbol, ent)->es;
912918

913919
return !xdiff_compare_lines(a->line + a->indent_off,
914920
a->len - a->indent_off,
915921
b->line + b->indent_off,
916922
b->len - b->indent_off, flags);
917923
}
918924

919-
static struct moved_entry *prepare_entry(struct diff_options *o,
920-
int line_no)
925+
static void prepare_entry(struct diff_options *o, struct emitted_diff_symbol *l,
926+
struct interned_diff_symbol *s)
921927
{
922-
struct moved_entry *ret = xmalloc(sizeof(*ret));
923-
struct emitted_diff_symbol *l = &o->emitted_symbols->buf[line_no];
924928
unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS;
925929
unsigned int hash = xdiff_hash_string(l->line + l->indent_off,
926930
l->len - l->indent_off, flags);
927931

928-
hashmap_entry_init(&ret->ent, hash);
929-
ret->es = l;
930-
ret->next_line = NULL;
931-
932-
return ret;
932+
hashmap_entry_init(&s->ent, hash);
933+
s->es = l;
933934
}
934935

935-
static void add_lines_to_move_detection(struct diff_options *o,
936-
struct hashmap *add_lines,
937-
struct hashmap *del_lines)
936+
struct moved_entry_list {
937+
struct moved_entry *add, *del;
938+
};
939+
940+
static struct moved_entry_list *add_lines_to_move_detection(struct diff_options *o,
941+
struct mem_pool *entry_mem_pool)
938942
{
939943
struct moved_entry *prev_line = NULL;
940-
944+
struct mem_pool interned_pool;
945+
struct hashmap interned_map;
946+
struct moved_entry_list *entry_list = NULL;
947+
size_t entry_list_alloc = 0;
948+
unsigned id = 0;
941949
int n;
950+
951+
hashmap_init(&interned_map, interned_diff_symbol_cmp, o, 8096);
952+
mem_pool_init(&interned_pool, 1024 * 1024);
953+
942954
for (n = 0; n < o->emitted_symbols->nr; n++) {
943-
struct hashmap *hm;
944-
struct moved_entry *key;
955+
struct interned_diff_symbol key;
956+
struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n];
957+
struct interned_diff_symbol *s;
958+
struct moved_entry *entry;
945959

946-
switch (o->emitted_symbols->buf[n].s) {
947-
case DIFF_SYMBOL_PLUS:
948-
hm = add_lines;
949-
break;
950-
case DIFF_SYMBOL_MINUS:
951-
hm = del_lines;
952-
break;
953-
default:
960+
if (l->s != DIFF_SYMBOL_PLUS && l->s != DIFF_SYMBOL_MINUS) {
954961
prev_line = NULL;
955962
continue;
956963
}
957964

958965
if (o->color_moved_ws_handling &
959966
COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
960-
fill_es_indent_data(&o->emitted_symbols->buf[n]);
961-
key = prepare_entry(o, n);
962-
if (prev_line && prev_line->es->s == o->emitted_symbols->buf[n].s)
963-
prev_line->next_line = key;
967+
fill_es_indent_data(l);
964968

965-
hashmap_add(hm, &key->ent);
966-
prev_line = key;
969+
prepare_entry(o, l, &key);
970+
s = hashmap_get_entry(&interned_map, &key, ent, &key.ent);
971+
if (s) {
972+
l->id = s->es->id;
973+
} else {
974+
l->id = id;
975+
ALLOC_GROW_BY(entry_list, id, 1, entry_list_alloc);
976+
hashmap_add(&interned_map,
977+
memcpy(mem_pool_alloc(&interned_pool,
978+
sizeof(key)),
979+
&key, sizeof(key)));
980+
}
981+
entry = mem_pool_alloc(entry_mem_pool, sizeof(*entry));
982+
entry->es = l;
983+
entry->next_line = NULL;
984+
if (prev_line && prev_line->es->s == l->s)
985+
prev_line->next_line = entry;
986+
prev_line = entry;
987+
if (l->s == DIFF_SYMBOL_PLUS) {
988+
entry->next_match = entry_list[l->id].add;
989+
entry_list[l->id].add = entry;
990+
} else {
991+
entry->next_match = entry_list[l->id].del;
992+
entry_list[l->id].del = entry;
993+
}
967994
}
995+
996+
hashmap_clear(&interned_map);
997+
mem_pool_discard(&interned_pool, 0);
998+
999+
return entry_list;
9681000
}
9691001

9701002
static void pmb_advance_or_null(struct diff_options *o,
@@ -973,7 +1005,6 @@ static void pmb_advance_or_null(struct diff_options *o,
9731005
int *pmb_nr)
9741006
{
9751007
int i, j;
976-
unsigned flags = o->color_moved_ws_handling & XDF_WHITESPACE_FLAGS;
9771008

9781009
for (i = 0, j = 0; i < *pmb_nr; i++) {
9791010
int match;
@@ -986,9 +1017,8 @@ static void pmb_advance_or_null(struct diff_options *o,
9861017
match = cur &&
9871018
!cmp_in_block_with_wsd(cur, l, &pmb[i]);
9881019
else
989-
match = cur &&
990-
xdiff_compare_lines(cur->es->line, cur->es->len,
991-
l->line, l->len, flags);
1020+
match = cur && cur->es->id == l->id;
1021+
9921022
if (match) {
9931023
pmb[j] = pmb[i];
9941024
pmb[j++].match = cur;
@@ -998,7 +1028,6 @@ static void pmb_advance_or_null(struct diff_options *o,
9981028
}
9991029

10001030
static void fill_potential_moved_blocks(struct diff_options *o,
1001-
struct hashmap *hm,
10021031
struct moved_entry *match,
10031032
struct emitted_diff_symbol *l,
10041033
struct moved_block **pmb_p,
@@ -1012,7 +1041,7 @@ static void fill_potential_moved_blocks(struct diff_options *o,
10121041
* The current line is the start of a new block.
10131042
* Setup the set of potential blocks.
10141043
*/
1015-
hashmap_for_each_entry_from(hm, match, ent) {
1044+
for (; match; match = match->next_match) {
10161045
ALLOC_GROW(pmb, pmb_nr + 1, pmb_alloc);
10171046
if (o->color_moved_ws_handling &
10181047
COLOR_MOVED_WS_ALLOW_INDENTATION_CHANGE)
@@ -1067,8 +1096,7 @@ static int adjust_last_block(struct diff_options *o, int n, int block_length)
10671096

10681097
/* Find blocks of moved code, delegate actual coloring decision to helper */
10691098
static void mark_color_as_moved(struct diff_options *o,
1070-
struct hashmap *add_lines,
1071-
struct hashmap *del_lines)
1099+
struct moved_entry_list *entry_list)
10721100
{
10731101
struct moved_block *pmb = NULL; /* potentially moved blocks */
10741102
int pmb_nr = 0, pmb_alloc = 0;
@@ -1077,23 +1105,15 @@ static void mark_color_as_moved(struct diff_options *o,
10771105

10781106

10791107
for (n = 0; n < o->emitted_symbols->nr; n++) {
1080-
struct hashmap *hm = NULL;
1081-
struct moved_entry *key;
10821108
struct moved_entry *match = NULL;
10831109
struct emitted_diff_symbol *l = &o->emitted_symbols->buf[n];
10841110

10851111
switch (l->s) {
10861112
case DIFF_SYMBOL_PLUS:
1087-
hm = del_lines;
1088-
key = prepare_entry(o, n);
1089-
match = hashmap_get_entry(hm, key, ent, NULL);
1090-
free(key);
1113+
match = entry_list[l->id].del;
10911114
break;
10921115
case DIFF_SYMBOL_MINUS:
1093-
hm = add_lines;
1094-
key = prepare_entry(o, n);
1095-
match = hashmap_get_entry(hm, key, ent, NULL);
1096-
free(key);
1116+
match = entry_list[l->id].add;
10971117
break;
10981118
default:
10991119
flipped_block = 0;
@@ -1135,7 +1155,7 @@ static void mark_color_as_moved(struct diff_options *o,
11351155
*/
11361156
n -= block_length;
11371157
else
1138-
fill_potential_moved_blocks(o, hm, match, l,
1158+
fill_potential_moved_blocks(o, match, l,
11391159
&pmb, &pmb_alloc,
11401160
&pmb_nr);
11411161

@@ -6253,20 +6273,18 @@ static void diff_flush_patch_all_file_pairs(struct diff_options *o)
62536273

62546274
if (o->emitted_symbols) {
62556275
if (o->color_moved) {
6256-
struct hashmap add_lines, del_lines;
6257-
6258-
hashmap_init(&del_lines, moved_entry_cmp, o, 0);
6259-
hashmap_init(&add_lines, moved_entry_cmp, o, 0);
6276+
struct mem_pool entry_pool;
6277+
struct moved_entry_list *entry_list;
62606278

6261-
add_lines_to_move_detection(o, &add_lines, &del_lines);
6262-
mark_color_as_moved(o, &add_lines, &del_lines);
6279+
mem_pool_init(&entry_pool, 1024 * 1024);
6280+
entry_list = add_lines_to_move_detection(o,
6281+
&entry_pool);
6282+
mark_color_as_moved(o, entry_list);
62636283
if (o->color_moved == COLOR_MOVED_ZEBRA_DIM)
62646284
dim_moved_lines(o);
62656285

6266-
hashmap_clear_and_free(&add_lines, struct moved_entry,
6267-
ent);
6268-
hashmap_clear_and_free(&del_lines, struct moved_entry,
6269-
ent);
6286+
mem_pool_discard(&entry_pool, 0);
6287+
free(entry_list);
62706288
}
62716289

62726290
for (i = 0; i < esm.nr; i++)

0 commit comments

Comments
 (0)