Skip to content

Commit e385e1b

Browse files
ezekielnewrengitster
authored andcommitted
xdiff: add macros DISCARD(0), KEEP(1), INVESTIGATE(2) in xprepare.c
This commit is refactor-only; no behavior is changed. A future commit will use bool literals for changed[i]. The functions xdl_clean_mmatch() and xdl_cleanup_records() will be cleaned up more in a future patch series. The changes to xdl_cleanup_records(), in this patch, are just to make it clear why `char rchg` is refactored to `bool changed`. Rename dis* to action* and replace literal numericals with macros. The old names came from when dis* (which I think was short for discard) was treated like a boolean, but over time it grew into a ternary state machine. The result was confusing because dis* and rchg* both used 0/1 values with different meanings. The new names and macros make the states explicit. nm is short for number of matches, and mlim is a heuristic limit: nm == 0 -> action[i] = DISCARD -> changed[i] = true 0 < nm < mlim -> action[i] = KEEP -> changed[i] = false nm >= mlim -> action[i] = INVESTIGATE -> changed[i] = xdl_clean_mmatch() When need_min is true, only DISCARD and KEEP occur because the limit is effectively infinite. Best-viewed-with: --color-words Signed-off-by: Ezekiel Newren <[email protected]> Signed-off-by: Junio C Hamano <[email protected]>
1 parent b7de64a commit e385e1b

File tree

1 file changed

+69
-37
lines changed

1 file changed

+69
-37
lines changed

xdiff/xprepare.c

Lines changed: 69 additions & 37 deletions
Original file line numberDiff line numberDiff line change
@@ -29,6 +29,9 @@
2929
#define XDL_GUESS_NLINES1 256
3030
#define XDL_GUESS_NLINES2 20
3131

32+
#define DISCARD 0
33+
#define KEEP 1
34+
#define INVESTIGATE 2
3235

3336
typedef struct s_xdlclass {
3437
struct s_xdlclass *next;
@@ -190,15 +193,15 @@ void xdl_free_env(xdfenv_t *xe) {
190193
}
191194

192195

193-
static int xdl_clean_mmatch(char const *dis, long i, long s, long e) {
196+
static bool xdl_clean_mmatch(uint8_t const *action, long i, long s, long e) {
194197
long r, rdis0, rpdis0, rdis1, rpdis1;
195198

196199
/*
197-
* Limits the window the is examined during the similar-lines
198-
* scan. The loops below stops when dis[i - r] == 1 (line that
199-
* has no match), but there are corner cases where the loop
200-
* proceed all the way to the extremities by causing huge
201-
* performance penalties in case of big files.
200+
* Limits the window that is examined during the similar-lines
201+
* scan. The loops below stops when action[i - r] == KEEP
202+
* (line that has no match), but there are corner cases where
203+
* the loop proceed all the way to the extremities by causing
204+
* huge performance penalties in case of big files.
202205
*/
203206
if (i - s > XDL_SIMSCAN_WINDOW)
204207
s = i - XDL_SIMSCAN_WINDOW;
@@ -207,40 +210,47 @@ static int xdl_clean_mmatch(char const *dis, long i, long s, long e) {
207210

208211
/*
209212
* Scans the lines before 'i' to find a run of lines that either
210-
* have no match (dis[j] == 0) or have multiple matches (dis[j] > 1).
211-
* Note that we always call this function with dis[i] > 1, so the
212-
* current line (i) is already a multimatch line.
213+
* have no match (action[j] == DISCARD) or have multiple matches
214+
* (action[j] == INVESTIGATE). Note that we always call this
215+
* function with action[i] == INVESTIGATE, so the current line
216+
* (i) is already a multimatch line.
213217
*/
214218
for (r = 1, rdis0 = 0, rpdis0 = 1; (i - r) >= s; r++) {
215-
if (!dis[i - r])
219+
if (action[i - r] == DISCARD)
216220
rdis0++;
217-
else if (dis[i - r] == 2)
221+
else if (action[i - r] == INVESTIGATE)
218222
rpdis0++;
219-
else
223+
else if (action[i - r] == KEEP)
220224
break;
225+
else
226+
BUG("Illegal value for action[i - r]");
221227
}
222228
/*
223-
* If the run before the line 'i' found only multimatch lines, we
224-
* return 0 and hence we don't make the current line (i) discarded.
225-
* We want to discard multimatch lines only when they appear in the
226-
* middle of runs with nomatch lines (dis[j] == 0).
229+
* If the run before the line 'i' found only multimatch lines,
230+
* we return false and hence we don't make the current line (i)
231+
* discarded. We want to discard multimatch lines only when
232+
* they appear in the middle of runs with nomatch lines
233+
* (action[j] == DISCARD).
227234
*/
228235
if (rdis0 == 0)
229236
return 0;
230237
for (r = 1, rdis1 = 0, rpdis1 = 1; (i + r) <= e; r++) {
231-
if (!dis[i + r])
238+
if (action[i + r] == DISCARD)
232239
rdis1++;
233-
else if (dis[i + r] == 2)
240+
else if (action[i + r] == INVESTIGATE)
234241
rpdis1++;
235-
else
242+
else if (action[i + r] == KEEP)
236243
break;
244+
else
245+
BUG("Illegal value for action[i + r]");
237246
}
238247
/*
239-
* If the run after the line 'i' found only multimatch lines, we
240-
* return 0 and hence we don't make the current line (i) discarded.
248+
* If the run after the line 'i' found only multimatch lines,
249+
* we return false and hence we don't make the current line (i)
250+
* discarded.
241251
*/
242252
if (rdis1 == 0)
243-
return 0;
253+
return false;
244254
rdis1 += rdis0;
245255
rpdis1 += rpdis0;
246256

@@ -251,59 +261,81 @@ static int xdl_clean_mmatch(char const *dis, long i, long s, long e) {
251261
/*
252262
* Try to reduce the problem complexity, discard records that have no
253263
* matches on the other file. Also, lines that have multiple matches
254-
* might be potentially discarded if they happear in a run of discardable.
264+
* might be potentially discarded if they appear in a run of discardable.
255265
*/
256266
static int xdl_cleanup_records(xdlclassifier_t *cf, xdfile_t *xdf1, xdfile_t *xdf2) {
257267
long i, nm, nreff, mlim;
258268
xrecord_t *recs;
259269
xdlclass_t *rcrec;
260-
char *dis, *dis1, *dis2;
261-
int need_min = !!(cf->flags & XDF_NEED_MINIMAL);
270+
uint8_t *action1 = NULL, *action2 = NULL;
271+
bool need_min = !!(cf->flags & XDF_NEED_MINIMAL);
272+
int ret = 0;
262273

263-
if (!XDL_CALLOC_ARRAY(dis, xdf1->nrec + xdf2->nrec + 2))
264-
return -1;
265-
dis1 = dis;
266-
dis2 = dis1 + xdf1->nrec + 1;
274+
/*
275+
* Create temporary arrays that will help us decide if
276+
* changed[i] should remain 0 or become 1.
277+
*/
278+
if (!XDL_CALLOC_ARRAY(action1, xdf1->nrec + 1)) {
279+
ret = -1;
280+
goto cleanup;
281+
}
282+
if (!XDL_CALLOC_ARRAY(action2, xdf2->nrec + 1)) {
283+
ret = -1;
284+
goto cleanup;
285+
}
267286

287+
/*
288+
* Initialize temporary arrays with DISCARD, KEEP, or INVESTIGATE.
289+
*/
268290
if ((mlim = xdl_bogosqrt(xdf1->nrec)) > XDL_MAX_EQLIMIT)
269291
mlim = XDL_MAX_EQLIMIT;
270292
for (i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart]; i <= xdf1->dend; i++, recs++) {
271293
rcrec = cf->rcrecs[recs->ha];
272294
nm = rcrec ? rcrec->len2 : 0;
273-
dis1[i] = (nm == 0) ? 0: (nm >= mlim && !need_min) ? 2: 1;
295+
action1[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP;
274296
}
275297

276298
if ((mlim = xdl_bogosqrt(xdf2->nrec)) > XDL_MAX_EQLIMIT)
277299
mlim = XDL_MAX_EQLIMIT;
278300
for (i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart]; i <= xdf2->dend; i++, recs++) {
279301
rcrec = cf->rcrecs[recs->ha];
280302
nm = rcrec ? rcrec->len1 : 0;
281-
dis2[i] = (nm == 0) ? 0: (nm >= mlim && !need_min) ? 2: 1;
303+
action2[i] = (nm == 0) ? DISCARD: (nm >= mlim && !need_min) ? INVESTIGATE: KEEP;
282304
}
283305

306+
/*
307+
* Use temporary arrays to decide if changed[i] should remain
308+
* 0 or become 1.
309+
*/
284310
for (nreff = 0, i = xdf1->dstart, recs = &xdf1->recs[xdf1->dstart];
285311
i <= xdf1->dend; i++, recs++) {
286-
if (dis1[i] == 1 ||
287-
(dis1[i] == 2 && !xdl_clean_mmatch(dis1, i, xdf1->dstart, xdf1->dend))) {
312+
if (action1[i] == KEEP ||
313+
(action1[i] == INVESTIGATE && !xdl_clean_mmatch(action1, i, xdf1->dstart, xdf1->dend))) {
288314
xdf1->rindex[nreff++] = i;
315+
/* changed[i] remains 0, i.e. keep */
289316
} else
290317
xdf1->changed[i] = 1;
318+
/* i.e. discard */
291319
}
292320
xdf1->nreff = nreff;
293321

294322
for (nreff = 0, i = xdf2->dstart, recs = &xdf2->recs[xdf2->dstart];
295323
i <= xdf2->dend; i++, recs++) {
296-
if (dis2[i] == 1 ||
297-
(dis2[i] == 2 && !xdl_clean_mmatch(dis2, i, xdf2->dstart, xdf2->dend))) {
324+
if (action2[i] == KEEP ||
325+
(action2[i] == INVESTIGATE && !xdl_clean_mmatch(action2, i, xdf2->dstart, xdf2->dend))) {
298326
xdf2->rindex[nreff++] = i;
327+
/* changed[i] remains 0, i.e. keep */
299328
} else
300329
xdf2->changed[i] = 1;
330+
/* i.e. discard */
301331
}
302332
xdf2->nreff = nreff;
303333

304-
xdl_free(dis);
334+
cleanup:
335+
xdl_free(action1);
336+
xdl_free(action2);
305337

306-
return 0;
338+
return ret;
307339
}
308340

309341

0 commit comments

Comments
 (0)