Skip to content

Commit 9be5182

Browse files
committed
rewrite whitespace-only
1 parent 2e5ad07 commit 9be5182

File tree

4 files changed

+980
-16
lines changed

4 files changed

+980
-16
lines changed

CMakeLists.txt

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -84,4 +84,14 @@ add_test(NAME lws5
8484
-P ${CMAKE_CURRENT_SOURCE_DIR}/tests/runtest.cmake
8585
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests/5)
8686

87+
add_test(NAME lws6
88+
COMMAND ${CMAKE_COMMAND}
89+
-DCMD=$<TARGET_FILE:${PROJECT_NAME}>
90+
-DSRC=b-comms.c
91+
-DPATCH=gemini.patch
92+
-DEXPSHA=6ea83a67aba0358099752cfaf83a28d5d983b50855e93352ae9c04d656c7911e
93+
-DEXPSHA_WIN=2e6b9b12ae0128c9edfc109744b9c67848712b0521c322a45104895aa4cbc3b1
94+
-P ${CMAKE_CURRENT_SOURCE_DIR}/tests/runtest.cmake
95+
WORKING_DIRECTORY ${PROJECT_SOURCE_DIR}/tests/6)
96+
8797

fixdiff.c

Lines changed: 157 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -84,11 +84,21 @@ typedef struct {
8484
int li;
8585
} lbuf_t;
8686

87+
typedef struct rewriter {
88+
struct rewriter *next;
89+
size_t len;
90+
int line;
91+
char *text;
92+
} rewriter_t;
93+
/* new_text is overcommitted below */
94+
8795
typedef struct {
8896
off_t flo;
8997

9098
const char *reason;
9199

100+
rewriter_t *rewriter_head;
101+
92102
dss_t d;
93103
int pre;
94104
int post;
@@ -102,6 +112,8 @@ typedef struct {
102112

103113
int fd_temp;
104114

115+
int li_out;
116+
105117
char ongoing;
106118
char skip_this_one;
107119
char lead_in_active;
@@ -313,10 +325,26 @@ fixdiff_stanza_start(dp_t *pdp, char *sh, size_t len)
313325
return 0;
314326
}
315327

328+
static void
329+
stain_copy(char *dest, const char *in, size_t len)
330+
{
331+
char *p = dest;
332+
333+
strncpy(dest, in, len - 1);
334+
dest[len - 1] = '\0';
335+
do {
336+
p = strchr(p, '\t');
337+
if (!p)
338+
break;
339+
*p = '>';
340+
p++;
341+
} while (1);
342+
}
343+
316344
static int
317345
fixdiff_find_original(dp_t *pdp, int *line_start)
318346
{
319-
char in_src[4096], in_temp[4096], b1[256], b2[256], hit = 0;
347+
char in_src[4096], in_temp[4096], b1[256], b2[256], f1[256], f2[256], hit = 0;
320348
int ret = 1, mc = 0, lmc = 0, lis = 0, lg_lis = 0;
321349
lbuf_t lb_temp, lb_src, lb;
322350
size_t lt, ls;
@@ -329,6 +357,8 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
329357
lb_src.fd = lb.fd = -1;
330358
b1[0] = '\0';
331359
b2[0] = '\0';
360+
f1[0] = '\0';
361+
f2[0] = '\0';
332362

333363
init_lbuf(&lb_temp, "temp");
334364
lb_temp.fd = open(pdp->temp, OFLAGS(O_RDWR));
@@ -402,26 +432,92 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
402432
break;
403433

404434
if (!ls) {
405-
elog("failed to match, best chunk %d lines at %s:%d\n",
435+
elog("failed to match, best chunk %d lines at %s:%d (tabs shown below as >)\n",
406436
lmc, pdp->pf, lg_lis);
407-
elog("patch: '%s', source '%s'\n", b1, b2);
437+
elog("last match: patch: '%s', source: '%s'\n", b1, b2);
438+
elog("divergence: patch: '%s', source: '%s'\n", f1, f2);
408439
mc = 0;
409440
break;
410441
}
411442

412443
if (fixdiff_strcmp(in_temp + 1, lt - 1, &let, in_src, ls, &les)) {
413-
if (mc > pdp->pre + pdp->post)
414-
elog("match failed after %d: '%s' / '%s'", mc, in_temp + 1, in_src);
444+
/*
445+
* It's still possible we only differ by whitespace.
446+
* Does it match if we treat any whitespace as a single
447+
* whitespace match token?
448+
*/
449+
450+
char *p1 = in_temp + 1, *p1_end = p1 + lt - 1 - (int)let,
451+
*p2 = in_src, *p2_end = p2 + ls - (int)les;
452+
453+
while (p1 < p1_end && p2 < p2_end) {
454+
char wst1 = 0, wst2 = 0;
455+
456+
while (*p1 == ' ' || *p1 == '\t' && p1 < p1_end) {
457+
p1++;
458+
wst1 = 1;
459+
}
460+
while (*p2 == ' ' || *p2 == '\t' && p2 < p2_end) {
461+
p2++;
462+
wst2 = 1;
463+
}
464+
465+
if (wst1 != wst2)
466+
goto record_breakage;
467+
468+
if (*p1 != *p2)
469+
goto record_breakage;
470+
471+
p1++;
472+
p2++;
473+
}
474+
475+
if ((p1 < p1_end) != (p2 < p2_end))
476+
goto record_breakage;
477+
478+
elog("(fixable whitespace-only difference at stanza line %d)\n", lb_temp.li);
479+
480+
{
481+
rewriter_t *rwt = malloc(sizeof(*rwt) + ls + 1);
482+
if (!rwt) {
483+
elog("OOM\n");
484+
return -1;
485+
}
486+
rwt->next = pdp->rewriter_head;
487+
pdp->rewriter_head = rwt;
488+
rwt->line = lb_temp.li;
489+
rwt->text = (char *)&rwt[1];
490+
rwt->text[0] = *in_temp;
491+
rwt->len = ls + 1;
492+
memcpy(rwt->text + 1, in_src, ls);
493+
}
494+
goto allow_match_ws;
495+
496+
record_breakage:
497+
if (mc + 1 > lmc) {
498+
stain_copy(f1, in_temp + 1, sizeof(f1));
499+
stain_copy(f2, in_src, sizeof(f2));
500+
}
415501
mc = 0;
502+
{
503+
rewriter_t *rwt = pdp->rewriter_head, *rwt1;
504+
505+
while (rwt) {
506+
rwt1 = rwt->next;
507+
free(rwt);
508+
rwt = rwt1;
509+
}
510+
511+
pdp->rewriter_head = NULL;
512+
}
416513
break;
417514
}
418515

516+
allow_match_ws:
419517
mc++;
420518
if (mc > lmc) {
421-
strncpy(b1, in_temp + 1, sizeof(b1) - 1);
422-
b1[sizeof(b1) - 1] = '\0';
423-
strncpy(b2, in_src + 1, sizeof(b2) - 1);
424-
b2[sizeof(b2) - 1] = '\0';
519+
stain_copy(b1, in_temp + 1, sizeof(b1));
520+
stain_copy(b2, in_src, sizeof(b2));
425521
lmc++;
426522
lg_lis = lis;
427523
}
@@ -512,8 +608,9 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
512608
static int
513609
fixdiff_stanza_end(dp_t *pdp)
514610
{
611+
int orig, nope = 0;
612+
lbuf_t lb_temp;
515613
char buf[256];
516-
int orig;
517614

518615
if (!pdp->ongoing)
519616
return 0;
@@ -554,21 +651,64 @@ fixdiff_stanza_end(dp_t *pdp)
554651

555652
/* dump the temp side-buffer into stdout */
556653

557-
lseek(pdp->fd_temp, pdp->flo, SEEK_SET);
654+
init_lbuf(&lb_temp, "lb_temp");
655+
lb_temp.fd = open(pdp->temp, OFLAGS(O_RDONLY));
656+
lseek(lb_temp.fd, pdp->flo, SEEK_SET);
657+
558658
while (1) {
559-
ssize_t l = read(pdp->fd_temp, buf, sizeof(buf));
659+
char buf[4096];
660+
ssize_t l = fixdiff_get_line(&lb_temp, buf, sizeof(buf));
661+
rewriter_t *rwt = pdp->rewriter_head;
662+
560663
if (!l)
561664
break;
562665

563-
if (write(1, buf, TO_POSLEN(l)) != (ssize_t)l) {
564-
pdp->reason = "failed to write to stdout";
565-
return 1;
666+
// elog("dumping %d (len %d)\n", (int)pdp->li_out, (int)l);
667+
668+
while (rwt) {
669+
// elog("%d %d\n", rwt->line, pdp->li_out);
670+
if (rwt->line == lb_temp.li /*pdp->li_out*/) /* we need to rewrite this line */
671+
break;
672+
673+
rwt = rwt->next;
674+
}
675+
676+
if (rwt) {
677+
// elog("rewriting '%.*s' to '%.*s'\n", (int)l, buf, (int)rwt->len, rwt->text);
678+
if (write(1, rwt->text, TO_POSLEN(rwt->len)) != (ssize_t)rwt->len) {
679+
pdp->reason = "failed to write to stdout";
680+
nope = 1;
681+
break;
682+
}
683+
} else {
684+
if (write(1, buf, TO_POSLEN(l)) != (ssize_t)l) {
685+
pdp->reason = "failed to write to stdout";
686+
nope = 1;
687+
break;
688+
}
566689
}
690+
691+
pdp->li_out++;
567692
}
568693

569-
close(pdp->fd_temp);
694+
{
695+
rewriter_t *rwt = pdp->rewriter_head, *rwt1;
696+
697+
while (rwt) {
698+
rwt1 = rwt->next;
699+
free(rwt);
700+
rwt = rwt1;
701+
}
702+
703+
pdp->rewriter_head = NULL;
704+
}
705+
706+
close(lb_temp.fd);
570707
pdp->fd_temp = -1;
571708

709+
if (nope)
710+
return 1;
711+
572712
/* track the effect stanza changes are having on line offsets */
573713
pdp->delta += pdp->post - pdp->pre;
574714

@@ -611,6 +751,7 @@ main(int argc, char *argv[])
611751
dp.d = DSS_WAIT_MMM;
612752
dp.lb.fd = 0; /* stdin */
613753
dp.fd_temp = -1;
754+
dp.li_out = 1;
614755

615756
while (1) {
616757
size_t l = fixdiff_get_line(&dp.lb, in, sizeof(in));

0 commit comments

Comments
 (0)