Skip to content

Commit 2db30cc

Browse files
committed
xdiff: separate parsing lines from hashing them
We want to use xxhash for faster hashing. To facilitate that and to simplify the code. Separate the concerns of parsing and hashing into discrete steps. This makes swapping the hash function much easier. Since xdl_hash_record() both parses and hashses lines, this requires some slight code restructuring. Signed-off-by: Ezekiel Newren <[email protected]>
1 parent 6df9f50 commit 2db30cc

File tree

1 file changed

+44
-31
lines changed

1 file changed

+44
-31
lines changed

xdiff/xprepare.c

Lines changed: 44 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -129,72 +129,85 @@ static int xdl_classify_record(unsigned int pass, xdlclassifier_t *cf, xrecord_t
129129
}
130130

131131

132+
static void xdl_parse_lines(mmfile_t *mf, long narec, xdfile_t *xdf) {
133+
u8 const* ptr = (u8 const*) mf->ptr;
134+
usize len = (usize) mf->size;
135+
136+
xdf->recs = NULL;
137+
xdf->nrec = 0;
138+
XDL_ALLOC_ARRAY(xdf->recs, narec);
139+
140+
while (len > 0) {
141+
xrecord_t *rec = NULL;
142+
usize length;
143+
u8 const* result = memchr(ptr, '\n', len);
144+
if (result) {
145+
length = result - ptr + 1;
146+
} else {
147+
length = len;
148+
}
149+
if (XDL_ALLOC_GROW(xdf->recs, xdf->nrec + 1, narec))
150+
die("XDL_ALLOC_GROW failed");
151+
rec = xdl_cha_alloc(&xdf->rcha);
152+
rec->ptr = ptr;
153+
rec->size = length;
154+
rec->ha = 0;
155+
xdf->recs[xdf->nrec++] = rec;
156+
ptr += length;
157+
len -= length;
158+
}
159+
160+
}
161+
162+
132163
static int xdl_prepare_ctx(unsigned int pass, mmfile_t *mf, long narec, xpparam_t const *xpp,
133164
xdlclassifier_t *cf, xdfile_t *xdf) {
134-
long nrec, bsize;
135-
unsigned long hav;
136-
char const *blk, *cur, *top, *prev;
137-
xrecord_t *crec;
138-
xrecord_t **recs;
139165
unsigned long *ha;
140166
char *rchg;
141167
long *rindex;
142168

143169
ha = NULL;
144170
rindex = NULL;
145171
rchg = NULL;
146-
recs = NULL;
147172

148173
if (xdl_cha_init(&xdf->rcha, sizeof(xrecord_t), narec / 4 + 1) < 0)
149174
goto abort;
150-
if (!XDL_ALLOC_ARRAY(recs, narec))
151-
goto abort;
152175

153-
nrec = 0;
154-
if ((cur = blk = xdl_mmfile_first(mf, &bsize))) {
155-
for (top = blk + bsize; cur < top; ) {
156-
prev = cur;
157-
hav = xdl_hash_record(&cur, top, xpp->flags);
158-
if (XDL_ALLOC_GROW(recs, nrec + 1, narec))
159-
goto abort;
160-
if (!(crec = xdl_cha_alloc(&xdf->rcha)))
161-
goto abort;
162-
crec->ptr = (u8 const*) prev;
163-
crec->size = (long) (cur - prev);
164-
crec->ha = hav;
165-
recs[nrec++] = crec;
166-
if (xdl_classify_record(pass, cf, crec) < 0)
167-
goto abort;
168-
}
176+
xdl_parse_lines(mf, narec, xdf);
177+
178+
for (usize i = 0; i < (usize) xdf->nrec; i++) {
179+
xrecord_t *rec = xdf->recs[i];
180+
char const* dump = (char const*) rec->ptr;
181+
rec->ha = xdl_hash_record(&dump, (char const*) (rec->ptr + rec->size), xpp->flags);
182+
xdl_classify_record(pass, cf, rec);
169183
}
170184

171-
if (!XDL_CALLOC_ARRAY(rchg, nrec + 2))
185+
186+
if (!XDL_CALLOC_ARRAY(rchg, xdf->nrec + 2))
172187
goto abort;
173188

174189
if ((XDF_DIFF_ALG(xpp->flags) != XDF_PATIENCE_DIFF) &&
175190
(XDF_DIFF_ALG(xpp->flags) != XDF_HISTOGRAM_DIFF)) {
176-
if (!XDL_ALLOC_ARRAY(rindex, nrec + 1))
191+
if (!XDL_ALLOC_ARRAY(rindex, xdf->nrec + 1))
177192
goto abort;
178-
if (!XDL_ALLOC_ARRAY(ha, nrec + 1))
193+
if (!XDL_ALLOC_ARRAY(ha, xdf->nrec + 1))
179194
goto abort;
180195
}
181196

182-
xdf->nrec = nrec;
183-
xdf->recs = recs;
184197
xdf->rchg = rchg + 1;
185198
xdf->rindex = rindex;
186199
xdf->nreff = 0;
187200
xdf->ha = ha;
188201
xdf->dstart = 0;
189-
xdf->dend = nrec - 1;
202+
xdf->dend = xdf->nrec - 1;
190203

191204
return 0;
192205

193206
abort:
194207
xdl_free(ha);
195208
xdl_free(rindex);
196209
xdl_free(rchg);
197-
xdl_free(recs);
210+
xdl_free(xdf->recs);
198211
xdl_cha_free(&xdf->rcha);
199212
return -1;
200213
}

0 commit comments

Comments
 (0)