Skip to content

Commit 27af01d

Browse files
rctaygitster
authored andcommitted
xdiff/xprepare: improve O(n*m) performance in xdl_cleanup_records()
In xdl_cleanup_records(), we see O(n*m) performance, where n is the number of records from xdf->dstart to xdf->dend, and m is the size of a bucket in xdf->rhash (<= by mlim). Here, we improve this to O(n) by pre-computing nm (in rcrec->len(1|2)) in xdl_classify_record(). Reported-by: Marat Radchenko <[email protected]> Signed-off-by: Tay Ray Chuan <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent 4bfe7cb commit 27af01d

File tree

1 file changed

+50
-36
lines changed

1 file changed

+50
-36
lines changed

xdiff/xprepare.c

Lines changed: 50 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -34,13 +34,16 @@ typedef struct s_xdlclass {
3434
char const *line;
3535
long size;
3636
long idx;
37+
long len1, len2;
3738
} xdlclass_t;
3839

3940
typedef struct s_xdlclassifier {
4041
unsigned int hbits;
4142
long hsize;
4243
xdlclass_t **rchash;
4344
chastore_t ncha;
45+
xdlclass_t **rcrecs;
46+
long alloc;
4447
long count;
4548
long flags;
4649
} xdlclassifier_t;
@@ -50,15 +53,15 @@ typedef struct s_xdlclassifier {
5053

5154
static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags);
5255
static void xdl_free_classifier(xdlclassifier_t *cf);
53-
static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits,
54-
xrecord_t *rec);
55-
static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
56+
static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash,
57+
unsigned int hbits, xrecord_t *rec);
58+
static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp,
5659
xdlclassifier_t *cf, xdfile_t *xdf);
5760
static void xdl_free_ctx(xdfile_t *xdf);
5861
static int xdl_clean_mmatch(char const *dis, long i, long s, long e);
59-
static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2);
62+
static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2);
6063
static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2);
61-
static int xdl_optimize_ctxs(xdfile_t *xdf1, xdfile_t *xdf2);
64+
static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2);
6265

6366

6467

@@ -83,6 +86,14 @@ static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) {
8386
for (i = 0; i < cf->hsize; i++)
8487
cf->rchash[i] = NULL;
8588

89+
cf->alloc = size;
90+
if (!(cf->rcrecs = (xdlclass_t **) xdl_malloc(cf->alloc * sizeof(xdlclass_t *)))) {
91+
92+
xdl_free(cf->rchash);
93+
xdl_cha_free(&cf->ncha);
94+
return -1;
95+
}
96+
8697
cf->count = 0;
8798

8899
return 0;
@@ -91,16 +102,18 @@ static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) {
91102

92103
static void xdl_free_classifier(xdlclassifier_t *cf) {
93104

105+
xdl_free(cf->rcrecs);
94106
xdl_free(cf->rchash);
95107
xdl_cha_free(&cf->ncha);
96108
}
97109

98110

99-
static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned int hbits,
100-
xrecord_t *rec) {
111+
static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t **rhash,
112+
unsigned int hbits, xrecord_t *rec) {
101113
long hi;
102114
char const *line;
103115
xdlclass_t *rcrec;
116+
xdlclass_t **rcrecs;
104117

105118
line = rec->ptr;
106119
hi = (long) XDL_HASHLONG(rec->ha, cf->hbits);
@@ -116,13 +129,25 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned
116129
return -1;
117130
}
118131
rcrec->idx = cf->count++;
132+
if (cf->count > cf->alloc) {
133+
cf->alloc *= 2;
134+
if (!(rcrecs = (xdlclass_t **) xdl_realloc(cf->rcrecs, cf->alloc * sizeof(xdlclass_t *)))) {
135+
136+
return -1;
137+
}
138+
cf->rcrecs = rcrecs;
139+
}
140+
cf->rcrecs[rcrec->idx] = rcrec;
119141
rcrec->line = line;
120142
rcrec->size = rec->size;
121143
rcrec->ha = rec->ha;
144+
rcrec->len1 = rcrec->len2 = 0;
122145
rcrec->next = cf->rchash[hi];
123146
cf->rchash[hi] = rcrec;
124147
}
125148

149+
(pass == 1) ? rcrec->len1++ : rcrec->len2++;
150+
126151
rec->ha = (unsigned long) rcrec->idx;
127152

128153
hi = (long) XDL_HASHLONG(rec->ha, hbits);
@@ -133,7 +158,7 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned
133158
}
134159

135160

136-
static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
161+
static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp,
137162
xdlclassifier_t *cf, xdfile_t *xdf) {
138163
unsigned int hbits;
139164
long i, nrec, hsize, bsize;
@@ -200,7 +225,7 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
200225
crec->ha = hav;
201226
recs[nrec++] = crec;
202227

203-
if (xdl_classify_record(cf, rhash, hbits, crec) < 0) {
228+
if (xdl_classify_record(pass, cf, rhash, hbits, crec) < 0) {
204229

205230
xdl_free(rhash);
206231
xdl_free(recs);
@@ -276,28 +301,28 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
276301
return -1;
277302
}
278303

279-
if (xdl_prepare_ctx(mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
304+
if (xdl_prepare_ctx(1, mf1, enl1, xpp, &cf, &xe->xdf1) < 0) {
280305

281306
xdl_free_classifier(&cf);
282307
return -1;
283308
}
284-
if (xdl_prepare_ctx(mf2, enl2, xpp, &cf, &xe->xdf2) < 0) {
309+
if (xdl_prepare_ctx(2, mf2, enl2, xpp, &cf, &xe->xdf2) < 0) {
285310

286311
xdl_free_ctx(&xe->xdf1);
287312
xdl_free_classifier(&cf);
288313
return -1;
289314
}
290315

291-
xdl_free_classifier(&cf);
292-
293316
if (!(xpp->flags & XDF_PATIENCE_DIFF) &&
294-
xdl_optimize_ctxs(&xe->xdf1, &xe->xdf2) < 0) {
317+
xdl_optimize_ctxs(&cf, &xe->xdf1, &xe->xdf2) < 0) {
295318

296319
xdl_free_ctx(&xe->xdf2);
297320
xdl_free_ctx(&xe->xdf1);
298321
return -1;
299322
}
300323

324+
xdl_free_classifier(&cf);
325+
301326
return 0;
302327
}
303328

@@ -372,11 +397,10 @@ static int xdl_clean_mmatch(char const *dis, long i, long s, long e) {
372397
* matches on the other file. Also, lines that have multiple matches
373398
* might be potentially discarded if they happear in a run of discardable.
374399
*/
375-
static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2) {
376-
long i, nm, rhi, nreff, mlim;
377-
unsigned long hav;
400+
static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) {
401+
long i, nm, nreff;
378402
xrecord_t **recs;
379-
xrecord_t *rec;
403+
xdlclass_t *rcrec;
380404
char *dis, *dis1, *dis2;
381405

382406
if (!(dis = (char *) xdl_malloc(xdf1->nrec + xdf2->nrec + 2))) {
@@ -387,26 +411,16 @@ static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2) {
387411
dis1 = dis;
388412
dis2 = dis1 + xdf1->nrec + 1;
389413

390-
if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT)
391-
mlim = XDL_MAX_EQLIMIT;
392414
for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) {
393-
hav = (*recs)->ha;
394-
rhi = (long) XDL_HASHLONG(hav, xdf2->hbits);
395-
for (nm = 0, rec = xdf2->rhash[rhi]; rec; rec = rec->next)
396-
if (rec->ha == hav && ++nm == mlim)
397-
break;
398-
dis1[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1;
415+
rcrec = cf->rcrecs[(*recs)->ha];
416+
nm = rcrec ? rcrec->len2 : 0;
417+
dis1[i] = (nm == 0) ? 0: 1;
399418
}
400419

401-
if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT)
402-
mlim = XDL_MAX_EQLIMIT;
403420
for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) {
404-
hav = (*recs)->ha;
405-
rhi = (long) XDL_HASHLONG(hav, xdf1->hbits);
406-
for (nm = 0, rec = xdf1->rhash[rhi]; rec; rec = rec->next)
407-
if (rec->ha == hav && ++nm == mlim)
408-
break;
409-
dis2[i] = (nm == 0) ? 0: (nm >= mlim) ? 2: 1;
421+
rcrec = cf->rcrecs[(*recs)->ha];
422+
nm = rcrec ? rcrec->len1 : 0;
423+
dis2[i] = (nm == 0) ? 0: 1;
410424
}
411425

412426
for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart];
@@ -468,10 +482,10 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) {
468482
}
469483

470484

471-
static int xdl_optimize_ctxs(xdfile_t *xdf1, xdfile_t *xdf2) {
485+
static int xdl_optimize_ctxs(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) {
472486

473487
if (xdl_trim_ends(xdf1, xdf2) < 0 ||
474-
xdl_cleanup_records(xdf1, xdf2) < 0) {
488+
xdl_cleanup_records(cf, xdf1, xdf2) < 0) {
475489

476490
return -1;
477491
}

0 commit comments

Comments
 (0)