@@ -84,11 +84,21 @@ typedef struct {
8484 int li ;
8585} lbuf_t ;
8686
87+ typedef struct rewriter {
88+ struct rewriter * next ;
89+ size_t len ;
90+ int line ;
91+ char * text ;
92+ } rewriter_t ;
93+ /* new_text is overcommitted below */
94+
8795typedef struct {
8896 off_t flo ;
8997
9098 const char * reason ;
9199
100+ rewriter_t * rewriter_head ;
101+
92102 dss_t d ;
93103 int pre ;
94104 int post ;
@@ -102,6 +112,8 @@ typedef struct {
102112
103113 int fd_temp ;
104114
115+ int li_out ;
116+
105117 char ongoing ;
106118 char skip_this_one ;
107119 char lead_in_active ;
@@ -313,10 +325,26 @@ fixdiff_stanza_start(dp_t *pdp, char *sh, size_t len)
313325 return 0 ;
314326}
315327
328+ static void
329+ stain_copy (char * dest , const char * in , size_t len )
330+ {
331+ char * p = dest ;
332+
333+ strncpy (dest , in , len - 1 );
334+ dest [len - 1 ] = '\0' ;
335+ do {
336+ p = strchr (p , '\t' );
337+ if (!p )
338+ break ;
339+ * p = '>' ;
340+ p ++ ;
341+ } while (1 );
342+ }
343+
316344static int
317345fixdiff_find_original (dp_t * pdp , int * line_start )
318346{
319- char in_src [4096 ], in_temp [4096 ], b1 [256 ], b2 [256 ], hit = 0 ;
347+ char in_src [4096 ], in_temp [4096 ], b1 [256 ], b2 [256 ], f1 [ 256 ], f2 [ 256 ], hit = 0 ;
320348 int ret = 1 , mc = 0 , lmc = 0 , lis = 0 , lg_lis = 0 ;
321349 lbuf_t lb_temp , lb_src , lb ;
322350 size_t lt , ls ;
@@ -329,6 +357,8 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
329357 lb_src .fd = lb .fd = -1 ;
330358 b1 [0 ] = '\0' ;
331359 b2 [0 ] = '\0' ;
360+ f1 [0 ] = '\0' ;
361+ f2 [0 ] = '\0' ;
332362
333363 init_lbuf (& lb_temp , "temp" );
334364 lb_temp .fd = open (pdp -> temp , OFLAGS (O_RDWR ));
@@ -402,26 +432,92 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
402432 break ;
403433
404434 if (!ls ) {
405- elog ("failed to match, best chunk %d lines at %s:%d\n" ,
435+ elog ("failed to match, best chunk %d lines at %s:%d (tabs shown below as >) \n" ,
406436 lmc , pdp -> pf , lg_lis );
407- elog ("patch: '%s', source '%s'\n" , b1 , b2 );
437+ elog ("last match: patch: '%s', source: '%s'\n" , b1 , b2 );
438+ elog ("divergence: patch: '%s', source: '%s'\n" , f1 , f2 );
408439 mc = 0 ;
409440 break ;
410441 }
411442
412443 if (fixdiff_strcmp (in_temp + 1 , lt - 1 , & let , in_src , ls , & les )) {
413- if (mc > pdp -> pre + pdp -> post )
414- elog ("match failed after %d: '%s' / '%s'" , mc , in_temp + 1 , in_src );
444+ /*
445+ * It's still possible we only differ by whitespace.
446+ * Does it match if we treat any whitespace as a single
447+ * whitespace match token?
448+ */
449+
450+ char * p1 = in_temp + 1 , * p1_end = p1 + lt - 1 - (int )let ,
451+ * p2 = in_src , * p2_end = p2 + ls - (int )les ;
452+
453+ while (p1 < p1_end && p2 < p2_end ) {
454+ char wst1 = 0 , wst2 = 0 ;
455+
456+ while (* p1 == ' ' || * p1 == '\t' && p1 < p1_end ) {
457+ p1 ++ ;
458+ wst1 = 1 ;
459+ }
460+ while (* p2 == ' ' || * p2 == '\t' && p2 < p2_end ) {
461+ p2 ++ ;
462+ wst2 = 1 ;
463+ }
464+
465+ if (wst1 != wst2 )
466+ goto record_breakage ;
467+
468+ if (* p1 != * p2 )
469+ goto record_breakage ;
470+
471+ p1 ++ ;
472+ p2 ++ ;
473+ }
474+
475+ if ((p1 < p1_end ) != (p2 < p2_end ))
476+ goto record_breakage ;
477+
478+ elog ("(fixable whitespace-only difference at stanza line %d)\n" , lb_temp .li );
479+
480+ {
481+ rewriter_t * rwt = malloc (sizeof (* rwt ) + ls + 1 );
482+ if (!rwt ) {
483+ elog ("OOM\n" );
484+ return -1 ;
485+ }
486+ rwt -> next = pdp -> rewriter_head ;
487+ pdp -> rewriter_head = rwt ;
488+ rwt -> line = lb_temp .li ;
489+ rwt -> text = (char * )& rwt [1 ];
490+ rwt -> text [0 ] = * in_temp ;
491+ rwt -> len = ls + 1 ;
492+ memcpy (rwt -> text + 1 , in_src , ls );
493+ }
494+ goto allow_match_ws ;
495+
496+ record_breakage :
497+ if (mc + 1 > lmc ) {
498+ stain_copy (f1 , in_temp + 1 , sizeof (f1 ));
499+ stain_copy (f2 , in_src , sizeof (f2 ));
500+ }
415501 mc = 0 ;
502+ {
503+ rewriter_t * rwt = pdp -> rewriter_head , * rwt1 ;
504+
505+ while (rwt ) {
506+ rwt1 = rwt -> next ;
507+ free (rwt );
508+ rwt = rwt1 ;
509+ }
510+
511+ pdp -> rewriter_head = NULL ;
512+ }
416513 break ;
417514 }
418515
516+ allow_match_ws :
419517 mc ++ ;
420518 if (mc > lmc ) {
421- strncpy (b1 , in_temp + 1 , sizeof (b1 ) - 1 );
422- b1 [sizeof (b1 ) - 1 ] = '\0' ;
423- strncpy (b2 , in_src + 1 , sizeof (b2 ) - 1 );
424- b2 [sizeof (b2 ) - 1 ] = '\0' ;
519+ stain_copy (b1 , in_temp + 1 , sizeof (b1 ));
520+ stain_copy (b2 , in_src , sizeof (b2 ));
425521 lmc ++ ;
426522 lg_lis = lis ;
427523 }
@@ -512,8 +608,9 @@ fixdiff_find_original(dp_t *pdp, int *line_start)
512608static int
513609fixdiff_stanza_end (dp_t * pdp )
514610{
611+ int orig , nope = 0 ;
612+ lbuf_t lb_temp ;
515613 char buf [256 ];
516- int orig ;
517614
518615 if (!pdp -> ongoing )
519616 return 0 ;
@@ -554,21 +651,64 @@ fixdiff_stanza_end(dp_t *pdp)
554651
555652 /* dump the temp side-buffer into stdout */
556653
557- lseek (pdp -> fd_temp , pdp -> flo , SEEK_SET );
654+ init_lbuf (& lb_temp , "lb_temp" );
655+ lb_temp .fd = open (pdp -> temp , OFLAGS (O_RDONLY ));
656+ lseek (lb_temp .fd , pdp -> flo , SEEK_SET );
657+
558658 while (1 ) {
559- ssize_t l = read (pdp -> fd_temp , buf , sizeof (buf ));
659+ char buf [4096 ];
660+ ssize_t l = fixdiff_get_line (& lb_temp , buf , sizeof (buf ));
661+ rewriter_t * rwt = pdp -> rewriter_head ;
662+
560663 if (!l )
561664 break ;
562665
563- if (write (1 , buf , TO_POSLEN (l )) != (ssize_t )l ) {
564- pdp -> reason = "failed to write to stdout" ;
565- return 1 ;
666+ // elog("dumping %d (len %d)\n", (int)pdp->li_out, (int)l);
667+
668+ while (rwt ) {
669+ // elog("%d %d\n", rwt->line, pdp->li_out);
670+ if (rwt -> line == lb_temp .li /*pdp->li_out*/ ) /* we need to rewrite this line */
671+ break ;
672+
673+ rwt = rwt -> next ;
674+ }
675+
676+ if (rwt ) {
677+ // elog("rewriting '%.*s' to '%.*s'\n", (int)l, buf, (int)rwt->len, rwt->text);
678+ if (write (1 , rwt -> text , TO_POSLEN (rwt -> len )) != (ssize_t )rwt -> len ) {
679+ pdp -> reason = "failed to write to stdout" ;
680+ nope = 1 ;
681+ break ;
682+ }
683+ } else {
684+ if (write (1 , buf , TO_POSLEN (l )) != (ssize_t )l ) {
685+ pdp -> reason = "failed to write to stdout" ;
686+ nope = 1 ;
687+ break ;
688+ }
566689 }
690+
691+ pdp -> li_out ++ ;
567692 }
568693
569- close (pdp -> fd_temp );
694+ {
695+ rewriter_t * rwt = pdp -> rewriter_head , * rwt1 ;
696+
697+ while (rwt ) {
698+ rwt1 = rwt -> next ;
699+ free (rwt );
700+ rwt = rwt1 ;
701+ }
702+
703+ pdp -> rewriter_head = NULL ;
704+ }
705+
706+ close (lb_temp .fd );
570707 pdp -> fd_temp = -1 ;
571708
709+ if (nope )
710+ return 1 ;
711+
572712 /* track the effect stanza changes are having on line offsets */
573713 pdp -> delta += pdp -> post - pdp -> pre ;
574714
@@ -611,6 +751,7 @@ main(int argc, char *argv[])
611751 dp .d = DSS_WAIT_MMM ;
612752 dp .lb .fd = 0 ; /* stdin */
613753 dp .fd_temp = -1 ;
754+ dp .li_out = 1 ;
614755
615756 while (1 ) {
616757 size_t l = fixdiff_get_line (& dp .lb , in , sizeof (in ));
0 commit comments