@@ -34,13 +34,16 @@ typedef struct s_xdlclass {
34
34
char const * line ;
35
35
long size ;
36
36
long idx ;
37
+ long len1 , len2 ;
37
38
} xdlclass_t ;
38
39
39
40
typedef struct s_xdlclassifier {
40
41
unsigned int hbits ;
41
42
long hsize ;
42
43
xdlclass_t * * rchash ;
43
44
chastore_t ncha ;
45
+ xdlclass_t * * rcrecs ;
46
+ long alloc ;
44
47
long count ;
45
48
long flags ;
46
49
} xdlclassifier_t ;
@@ -50,15 +53,15 @@ typedef struct s_xdlclassifier {
50
53
51
54
static int xdl_init_classifier (xdlclassifier_t * cf , long size , long flags );
52
55
static void xdl_free_classifier (xdlclassifier_t * cf );
53
- static int xdl_classify_record (xdlclassifier_t * cf , xrecord_t * * rhash , unsigned int hbits ,
54
- xrecord_t * rec );
55
- static int xdl_prepare_ctx (mmfile_t * mf , long narec , xpparam_t const * xpp ,
56
+ static int xdl_classify_record (unsigned int pass , xdlclassifier_t * cf , xrecord_t * * rhash ,
57
+ unsigned int hbits , xrecord_t * rec );
58
+ static int xdl_prepare_ctx (unsigned int pass , mmfile_t * mf , long narec , xpparam_t const * xpp ,
56
59
xdlclassifier_t * cf , xdfile_t * xdf );
57
60
static void xdl_free_ctx (xdfile_t * xdf );
58
61
static int xdl_clean_mmatch (char const * dis , long i , long s , long e );
59
- static int xdl_cleanup_records (xdfile_t * xdf1 , xdfile_t * xdf2 );
62
+ static int xdl_cleanup_records (xdlclassifier_t * cf , xdfile_t * xdf1 , xdfile_t * xdf2 );
60
63
static int xdl_trim_ends (xdfile_t * xdf1 , xdfile_t * xdf2 );
61
- static int xdl_optimize_ctxs (xdfile_t * xdf1 , xdfile_t * xdf2 );
64
+ static int xdl_optimize_ctxs (xdlclassifier_t * cf , xdfile_t * xdf1 , xdfile_t * xdf2 );
62
65
63
66
64
67
@@ -83,6 +86,14 @@ static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) {
83
86
for (i = 0 ; i < cf -> hsize ; i ++ )
84
87
cf -> rchash [i ] = NULL ;
85
88
89
+ cf -> alloc = size ;
90
+ if (!(cf -> rcrecs = (xdlclass_t * * ) xdl_malloc (cf -> alloc * sizeof (xdlclass_t * )))) {
91
+
92
+ xdl_free (cf -> rchash );
93
+ xdl_cha_free (& cf -> ncha );
94
+ return -1 ;
95
+ }
96
+
86
97
cf -> count = 0 ;
87
98
88
99
return 0 ;
@@ -91,16 +102,18 @@ static int xdl_init_classifier(xdlclassifier_t *cf, long size, long flags) {
91
102
92
103
static void xdl_free_classifier (xdlclassifier_t * cf ) {
93
104
105
+ xdl_free (cf -> rcrecs );
94
106
xdl_free (cf -> rchash );
95
107
xdl_cha_free (& cf -> ncha );
96
108
}
97
109
98
110
99
- static int xdl_classify_record (xdlclassifier_t * cf , xrecord_t * * rhash , unsigned int hbits ,
100
- xrecord_t * rec ) {
111
+ static int xdl_classify_record (unsigned int pass , xdlclassifier_t * cf , xrecord_t * * rhash ,
112
+ unsigned int hbits , xrecord_t * rec ) {
101
113
long hi ;
102
114
char const * line ;
103
115
xdlclass_t * rcrec ;
116
+ xdlclass_t * * rcrecs ;
104
117
105
118
line = rec -> ptr ;
106
119
hi = (long ) XDL_HASHLONG (rec -> ha , cf -> hbits );
@@ -116,13 +129,25 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned
116
129
return -1 ;
117
130
}
118
131
rcrec -> idx = cf -> count ++ ;
132
+ if (cf -> count > cf -> alloc ) {
133
+ cf -> alloc *= 2 ;
134
+ if (!(rcrecs = (xdlclass_t * * ) xdl_realloc (cf -> rcrecs , cf -> alloc * sizeof (xdlclass_t * )))) {
135
+
136
+ return -1 ;
137
+ }
138
+ cf -> rcrecs = rcrecs ;
139
+ }
140
+ cf -> rcrecs [rcrec -> idx ] = rcrec ;
119
141
rcrec -> line = line ;
120
142
rcrec -> size = rec -> size ;
121
143
rcrec -> ha = rec -> ha ;
144
+ rcrec -> len1 = rcrec -> len2 = 0 ;
122
145
rcrec -> next = cf -> rchash [hi ];
123
146
cf -> rchash [hi ] = rcrec ;
124
147
}
125
148
149
+ (pass == 1 ) ? rcrec -> len1 ++ : rcrec -> len2 ++ ;
150
+
126
151
rec -> ha = (unsigned long ) rcrec -> idx ;
127
152
128
153
hi = (long ) XDL_HASHLONG (rec -> ha , hbits );
@@ -133,7 +158,7 @@ static int xdl_classify_record(xdlclassifier_t *cf, xrecord_t **rhash, unsigned
133
158
}
134
159
135
160
136
- static int xdl_prepare_ctx (mmfile_t * mf , long narec , xpparam_t const * xpp ,
161
+ static int xdl_prepare_ctx (unsigned int pass , mmfile_t * mf , long narec , xpparam_t const * xpp ,
137
162
xdlclassifier_t * cf , xdfile_t * xdf ) {
138
163
unsigned int hbits ;
139
164
long i , nrec , hsize , bsize ;
@@ -200,7 +225,7 @@ static int xdl_prepare_ctx(mmfile_t *mf, long narec, xpparam_t const *xpp,
200
225
crec -> ha = hav ;
201
226
recs [nrec ++ ] = crec ;
202
227
203
- if (xdl_classify_record (cf , rhash , hbits , crec ) < 0 ) {
228
+ if (xdl_classify_record (pass , cf , rhash , hbits , crec ) < 0 ) {
204
229
205
230
xdl_free (rhash );
206
231
xdl_free (recs );
@@ -276,28 +301,28 @@ int xdl_prepare_env(mmfile_t *mf1, mmfile_t *mf2, xpparam_t const *xpp,
276
301
return -1 ;
277
302
}
278
303
279
- if (xdl_prepare_ctx (mf1 , enl1 , xpp , & cf , & xe -> xdf1 ) < 0 ) {
304
+ if (xdl_prepare_ctx (1 , mf1 , enl1 , xpp , & cf , & xe -> xdf1 ) < 0 ) {
280
305
281
306
xdl_free_classifier (& cf );
282
307
return -1 ;
283
308
}
284
- if (xdl_prepare_ctx (mf2 , enl2 , xpp , & cf , & xe -> xdf2 ) < 0 ) {
309
+ if (xdl_prepare_ctx (2 , mf2 , enl2 , xpp , & cf , & xe -> xdf2 ) < 0 ) {
285
310
286
311
xdl_free_ctx (& xe -> xdf1 );
287
312
xdl_free_classifier (& cf );
288
313
return -1 ;
289
314
}
290
315
291
- xdl_free_classifier (& cf );
292
-
293
316
if (!(xpp -> flags & XDF_PATIENCE_DIFF ) &&
294
- xdl_optimize_ctxs (& xe -> xdf1 , & xe -> xdf2 ) < 0 ) {
317
+ xdl_optimize_ctxs (& cf , & xe -> xdf1 , & xe -> xdf2 ) < 0 ) {
295
318
296
319
xdl_free_ctx (& xe -> xdf2 );
297
320
xdl_free_ctx (& xe -> xdf1 );
298
321
return -1 ;
299
322
}
300
323
324
+ xdl_free_classifier (& cf );
325
+
301
326
return 0 ;
302
327
}
303
328
@@ -372,11 +397,10 @@ static int xdl_clean_mmatch(char const *dis, long i, long s, long e) {
372
397
* matches on the other file. Also, lines that have multiple matches
373
398
* might be potentially discarded if they happear in a run of discardable.
374
399
*/
375
- static int xdl_cleanup_records (xdfile_t * xdf1 , xdfile_t * xdf2 ) {
376
- long i , nm , rhi , nreff , mlim ;
377
- unsigned long hav ;
400
+ static int xdl_cleanup_records (xdlclassifier_t * cf , xdfile_t * xdf1 , xdfile_t * xdf2 ) {
401
+ long i , nm , nreff ;
378
402
xrecord_t * * recs ;
379
- xrecord_t * rec ;
403
+ xdlclass_t * rcrec ;
380
404
char * dis , * dis1 , * dis2 ;
381
405
382
406
if (!(dis = (char * ) xdl_malloc (xdf1 -> nrec + xdf2 -> nrec + 2 ))) {
@@ -387,26 +411,16 @@ static int xdl_cleanup_records(xdfile_t *xdf1, xdfile_t *xdf2) {
387
411
dis1 = dis ;
388
412
dis2 = dis1 + xdf1 -> nrec + 1 ;
389
413
390
- if ((mlim = xdl_bogosqrt (xdf1 -> nrec )) > XDL_MAX_EQLIMIT )
391
- mlim = XDL_MAX_EQLIMIT ;
392
414
for (i = xdf1 -> dstart , recs = & xdf1 -> recs [xdf1 -> dstart ]; i <= xdf1 -> dend ; i ++ , recs ++ ) {
393
- hav = (* recs )-> ha ;
394
- rhi = (long ) XDL_HASHLONG (hav , xdf2 -> hbits );
395
- for (nm = 0 , rec = xdf2 -> rhash [rhi ]; rec ; rec = rec -> next )
396
- if (rec -> ha == hav && ++ nm == mlim )
397
- break ;
398
- dis1 [i ] = (nm == 0 ) ? 0 : (nm >= mlim ) ? 2 : 1 ;
415
+ rcrec = cf -> rcrecs [(* recs )-> ha ];
416
+ nm = rcrec ? rcrec -> len2 : 0 ;
417
+ dis1 [i ] = (nm == 0 ) ? 0 : 1 ;
399
418
}
400
419
401
- if ((mlim = xdl_bogosqrt (xdf2 -> nrec )) > XDL_MAX_EQLIMIT )
402
- mlim = XDL_MAX_EQLIMIT ;
403
420
for (i = xdf2 -> dstart , recs = & xdf2 -> recs [xdf2 -> dstart ]; i <= xdf2 -> dend ; i ++ , recs ++ ) {
404
- hav = (* recs )-> ha ;
405
- rhi = (long ) XDL_HASHLONG (hav , xdf1 -> hbits );
406
- for (nm = 0 , rec = xdf1 -> rhash [rhi ]; rec ; rec = rec -> next )
407
- if (rec -> ha == hav && ++ nm == mlim )
408
- break ;
409
- dis2 [i ] = (nm == 0 ) ? 0 : (nm >= mlim ) ? 2 : 1 ;
421
+ rcrec = cf -> rcrecs [(* recs )-> ha ];
422
+ nm = rcrec ? rcrec -> len1 : 0 ;
423
+ dis2 [i ] = (nm == 0 ) ? 0 : 1 ;
410
424
}
411
425
412
426
for (nreff = 0 , i = xdf1 -> dstart , recs = & xdf1 -> recs [xdf1 -> dstart ];
@@ -468,10 +482,10 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) {
468
482
}
469
483
470
484
471
- static int xdl_optimize_ctxs (xdfile_t * xdf1 , xdfile_t * xdf2 ) {
485
+ static int xdl_optimize_ctxs (xdlclassifier_t * cf , xdfile_t * xdf1 , xdfile_t * xdf2 ) {
472
486
473
487
if (xdl_trim_ends (xdf1 , xdf2 ) < 0 ||
474
- xdl_cleanup_records (xdf1 , xdf2 ) < 0 ) {
488
+ xdl_cleanup_records (cf , xdf1 , xdf2 ) < 0 ) {
475
489
476
490
return -1 ;
477
491
}
0 commit comments