Skip to content

Commit 663c5ad

Browse files
phillipwoodgitster
authored andcommitted
diff histogram: intern strings
Histogram is the only diff algorithm not to call xdl_classify_record(). xdl_classify_record() ensures that the hash values of two strings that are not equal differ which means that it is not necessary to use xdl_recmatch() when comparing lines, all that is necessary is to compare the hash values. This gives a 7% reduction in the runtime of "git log --patch" when using the histogram diff algorithm. Test HEAD^ HEAD ----------------------------------------------------------------------------- 4000.1: log -3000 (baseline) 0.18(0.14+0.04) 0.19(0.17+0.02) +5.6% 4000.2: log --raw -3000 (tree-only) 0.99(0.77+0.21) 0.98(0.78+0.20) -1.0% 4000.3: log -p -3000 (Myers) 4.84(4.31+0.51) 4.81(4.15+0.64) -0.6% 4000.4: log -p -3000 --histogram 6.34(5.86+0.46) 5.87(5.19+0.66) -7.4% 4000.5: log -p -3000 --patience 5.39(4.60+0.76) 5.35(4.60+0.73) -0.7% Signed-off-by: Phillip Wood <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent cd3e606 commit 663c5ad

File tree

2 files changed

+10
-19
lines changed

2 files changed

+10
-19
lines changed

xdiff/xhistogram.c

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -91,9 +91,8 @@ struct region {
9191
static int cmp_recs(xpparam_t const *xpp,
9292
xrecord_t *r1, xrecord_t *r2)
9393
{
94-
return r1->ha == r2->ha &&
95-
xdl_recmatch(r1->ptr, r1->size, r2->ptr, r2->size,
96-
xpp->flags);
94+
return r1->ha == r2->ha;
95+
9796
}
9897

9998
#define CMP_ENV(xpp, env, s1, l1, s2, l2) \

xdiff/xprepare.c

Lines changed: 8 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -181,15 +181,11 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
181181
if (!(recs = (xrecord_t **) xdl_malloc(narec * sizeof(xrecord_t *))))
182182
goto abort;
183183

184-
if (XDF_DIFF_ALG(xpp->flags) == XDF_HISTOGRAM_DIFF)
185-
hbits = hsize = 0;
186-
else {
187-
hbits = xdl_hashbits((unsigned int) narec);
188-
hsize = 1 << hbits;
189-
if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *))))
190-
goto abort;
191-
memset(rhash, 0, hsize * sizeof(xrecord_t *));
192-
}
184+
hbits = xdl_hashbits((unsigned int) narec);
185+
hsize = 1 << hbits;
186+
if (!(rhash = (xrecord_t **) xdl_malloc(hsize * sizeof(xrecord_t *))))
187+
goto abort;
188+
memset(rhash, 0, hsize * sizeof(xrecord_t *));
193189

194190
nrec = 0;
195191
if ((cur = blk = xdl_mmfile_first(mf, &bsize)) != NULL) {
@@ -208,9 +204,7 @@ static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_
208204
crec->size = (long) (cur - prev);
209205
crec->ha = hav;
210206
recs[nrec++] = crec;
211-
212-
if ((XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF) &&
213-
xdl_classify_record(pass, cf, rhash, hbits, crec) < 0)
207+
if (xdl_classify_record(pass, cf, rhash, hbits, crec) < 0)
214208
goto abort;
215209
}
216210
}
@@ -279,8 +273,7 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
279273
enl1 = xdl_guess_lines(mf1, sample) + 1;
280274
enl2 = xdl_guess_lines(mf2, sample) + 1;
281275

282-
if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF &&
283-
xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0)
276+
if (xdl_init_classifier(&cf, enl1 + enl2 + 1, xpp->flags) < 0)
284277
return -1;
285278

286279
if (xdl_prepare_ctx(1, mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
@@ -305,8 +298,7 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
305298
return -1;
306299
}
307300

308-
if (XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)
309-
xdl_free_classifier(&cf);
301+
xdl_free_classifier(&cf);
310302

311303
return 0;
312304
}

0 commit comments

Comments
 (0)