Skip to content

Commit ee80809

Browse files
committed
rewrite whitespace-only
1 parent 2e5ad07 commit ee80809

File tree

2 files changed

+166
-16
lines changed

2 files changed

+166
-16
lines changed

CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,4 +84,14 @@ add_test(NAME lws5
8484
-P ${CMAKE_CURRENT_SOURCE_DIR}/tests/runtest.cmake
8585
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests/5)
8686

87+
add_test(NAME lws6
88+
COMMAND ${CMAKE_COMMAND}
89+
-DCMD=$<TARGET_FILE:${PROJECT_NAME}>
90+
-DSRC=b-comms.c
91+
-DPATCH=gemini.patch
92+
-DEXPSHA=6ea83a67aba0358099752cfaf83a28d5d983b50855e93352ae9c04d656c7911e
93+
-DEXPSHA_WIN=2e6b9b12ae0128c9edfc109744b9c67848712b0521c322a45104895aa4cbc3b1
94+
-P ${CMAKE_CURRENT_SOURCE_DIR}/tests/runtest.cmake
95+
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests/6)
96+
8797

fixdiff.c

Lines changed: 156 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,21 @@ typedef struct {
8484
int li;
8585
} lbuf_t;
8686

87+
typedef struct rewriter {
88+
struct rewriter *next;
89+
size_t len;
90+
int line;
91+
char *text;
92+
} rewriter_t;
93+
/* new_text is overcommitted below */
94+
8795
typedef struct {
8896
off_t flo;
8997

9098
const char *reason;
9199

100+
rewriter_t *rewriter_head;
101+
92102
dss_t d;
93103
int pre;
94104
int post;
@@ -102,6 +112,8 @@ typedef struct {
102112

103113
int fd_temp;
104114

115+
int li_out;
116+
105117
char ongoing;
106118
char skip_this_one;
107119
char lead_in_active;
@@ -313,10 +325,26 @@ fixdiff_stanza_start(dp_t *pdp, char *sh, size_t len)
313325
return 0;
314326
}
315327

328+
static void
329+
stain_copy(char *dest, const char *in, size_t len)
330+
{
331+
char *p = dest;
332+
333+
strncpy(dest, in, len - 1);
334+
dest[len - 1] = '\0';
335+
do {
336+
p = strchr(p, '\t');
337+
if (!p)
338+
break;
339+
*p = '>';
340+
p++;
341+
} while (1);
342+
}
343+
316344
static int
317345
fixdiff_find_original(dp_t *pdp, int *line_start)
318346
{
319-
char in_src[4096], in_temp[4096], b1[256], b2[256], hit = 0;
347+
char in_src[4096], in_temp[4096], b1[256], b2[256], f1[256], f2[256], hit = 0;
320348
int ret = 1, mc = 0, lmc = 0, lis = 0, lg_lis = 0;
321349
lbuf_t lb_temp, lb_src, lb;
322350
size_t lt, ls;
@@ -329,6 +357,8 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
329357
lb_src.fd = lb.fd = -1;
330358
b1[0] = '\0';
331359
b2[0] = '\0';
360+
f1[0] = '\0';
361+
f2[0] = '\0';
332362

333363
init_lbuf(&lb_temp, "temp");
334364
lb_temp.fd = open(pdp->temp, OFLAGS(O_RDWR));
@@ -402,26 +432,91 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
402432
break;
403433

404434
if (!ls) {
405-
elog("failed to match, best chunk %d lines at %s:%d\n",
435+
elog("failed to match, best chunk %d lines at %s:%d (tabs shown below as >)\n",
406436
lmc, pdp->pf, lg_lis);
407-
elog("patch: '%s', source '%s'\n", b1, b2);
437+
elog("last match: patch: '%s', source: '%s'\n", b1, b2);
438+
elog("divergence: patch: '%s', source: '%s'\n", f1, f2);
408439
mc = 0;
409440
break;
410441
}
411442

412443
if (fixdiff_strcmp(in_temp + 1, lt - 1, &let, in_src, ls, &les)) {
413-
if (mc > pdp->pre + pdp->post)
414-
elog("match failed after %d: '%s' / '%s'", mc, in_temp + 1, in_src);
444+
/*
445+
* It's still possible we only differ by whitespace.
446+
* Does it match if we treat any whitespace as a single
447+
* whitespace match token?
448+
*/
449+
450+
char *p1 = in_temp + 1, *p1_end = p1 + lt - 1, *p2 = in_src, *p2_end = p2 + ls;
451+
452+
while (p1 < p1_end && p2 < p2_end) {
453+
char wst1 = 0, wst2 = 0;
454+
455+
while (*p1 == ' ' || *p1 == '\t' && p1 < p1_end) {
456+
p1++;
457+
wst1 = 1;
458+
}
459+
while (*p2 == ' ' || *p2 == '\t' && p2 < p2_end) {
460+
p2++;
461+
wst2 = 1;
462+
}
463+
464+
if (wst1 != wst2)
465+
goto record_breakage;
466+
467+
if (*p1 != *p2)
468+
goto record_breakage;
469+
470+
p1++;
471+
p2++;
472+
}
473+
474+
if ((p1 < p1_end) != (p2 < p2_end))
475+
goto record_breakage;
476+
477+
elog("(fixable whitespace-only difference at stanza line %d)\n", lb_temp.li);
478+
479+
{
480+
rewriter_t *rwt = malloc(sizeof(*rwt) + ls + 1);
481+
if (!rwt) {
482+
elog("OOM\n");
483+
return -1;
484+
}
485+
rwt->next = pdp->rewriter_head;
486+
pdp->rewriter_head = rwt;
487+
rwt->line = lb_temp.li;
488+
rwt->text = (char *)&rwt[1];
489+
rwt->text[0] = *in_temp;
490+
rwt->len = ls + 1;
491+
memcpy(rwt->text + 1, in_src, ls);
492+
}
493+
goto allow_match_ws;
494+
495+
record_breakage:
496+
if (mc + 1 > lmc) {
497+
stain_copy(f1, in_temp + 1, sizeof(f1));
498+
stain_copy(f2, in_src, sizeof(f2));
499+
}
415500
mc = 0;
501+
{
502+
rewriter_t *rwt = pdp->rewriter_head, *rwt1;
503+
504+
while (rwt) {
505+
rwt1 = rwt->next;
506+
free(rwt);
507+
rwt = rwt1;
508+
}
509+
510+
pdp->rewriter_head = NULL;
511+
}
416512
break;
417513
}
418514

515+
allow_match_ws:
419516
mc++;
420517
if (mc > lmc) {
421-
strncpy(b1, in_temp + 1, sizeof(b1) - 1);
422-
b1[sizeof(b1) - 1] = '\0';
423-
strncpy(b2, in_src + 1, sizeof(b2) - 1);
424-
b2[sizeof(b2) - 1] = '\0';
518+
stain_copy(b1, in_temp + 1, sizeof(b1));
519+
stain_copy(b2, in_src, sizeof(b2));
425520
lmc++;
426521
lg_lis = lis;
427522
}
@@ -512,8 +607,9 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
512607
static int
513608
fixdiff_stanza_end(dp_t *pdp)
514609
{
610+
int orig, nope = 0;
611+
lbuf_t lb_temp;
515612
char buf[256];
516-
int orig;
517613

518614
if (!pdp->ongoing)
519615
return 0;
@@ -554,21 +650,64 @@ fixdiff_stanza_end(dp_t *pdp)
554650

555651
/* dump the temp side-buffer into stdout */
556652

557-
lseek(pdp->fd_temp, pdp->flo, SEEK_SET);
653+
init_lbuf(&lb_temp, "lb_temp");
654+
lb_temp.fd = open(pdp->temp, OFLAGS(O_RDONLY));
655+
lseek(lb_temp.fd, pdp->flo, SEEK_SET);
656+
558657
while (1) {
559-
ssize_t l = read(pdp->fd_temp, buf, sizeof(buf));
658+
char buf[4096];
659+
ssize_t l = fixdiff_get_line(&lb_temp, buf, sizeof(buf));
660+
rewriter_t *rwt = pdp->rewriter_head;
661+
560662
if (!l)
561663
break;
562664

563-
if (write(1, buf, TO_POSLEN(l)) != (ssize_t)l) {
564-
pdp->reason = "failed to write to stdout";
565-
return 1;
665+
// elog("dumping %d (len %d)\n", (int)pdp->li_out, (int)l);
666+
667+
while (rwt) {
668+
// elog("%d %d\n", rwt->line, pdp->li_out);
669+
if (rwt->line == lb_temp.li /*pdp->li_out*/) /* we need to rewrite this line */
670+
break;
671+
672+
rwt = rwt->next;
673+
}
674+
675+
if (rwt) {
676+
// elog("rewriting '%.*s' to '%.*s'\n", (int)l, buf, (int)rwt->len, rwt->text);
677+
if (write(1, rwt->text, TO_POSLEN(rwt->len)) != (ssize_t)rwt->len) {
678+
pdp->reason = "failed to write to stdout";
679+
nope = 1;
680+
break;
681+
}
682+
} else {
683+
if (write(1, buf, TO_POSLEN(l)) != (ssize_t)l) {
684+
pdp->reason = "failed to write to stdout";
685+
nope = 1;
686+
break;
687+
}
566688
}
689+
690+
pdp->li_out++;
567691
}
568692

569-
close(pdp->fd_temp);
693+
{
694+
rewriter_t *rwt = pdp->rewriter_head, *rwt1;
695+
696+
while (rwt) {
697+
rwt1 = rwt->next;
698+
free(rwt);
699+
rwt = rwt1;
700+
}
701+
702+
pdp->rewriter_head = NULL;
703+
}
704+
705+
close(lb_temp.fd);
570706
pdp->fd_temp = -1;
571707

708+
if (nope)
709+
return 1;
710+
572711
/* track the effect stanza changes are having on line offsets */
573712
pdp->delta += pdp->post - pdp->pre;
574713

@@ -611,6 +750,7 @@ main(int argc, char *argv[])
611750
dp.d = DSS_WAIT_MMM;
612751
dp.lb.fd = 0; /* stdin */
613752
dp.fd_temp = -1;
753+
dp.li_out = 1;
614754

615755
while (1) {
616756
size_t l = fixdiff_get_line(&dp.lb, in, sizeof(in));

0 commit comments

Comments
 (0)