1+ //! Facilities to produce the unified diff format. 
12//! Originally based on https://github.com/pascalkuthe/imara-diff/pull/14. 
23//! 
34
@@ -25,21 +26,50 @@ impl ContextSize {
2526    } 
2627} 
2728
29+ /// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff). 
30+ pub  trait  ConsumeHunk  { 
31+     /// The item this instance produces after consuming all hunks. 
32+      type  Out ; 
33+ 
34+     /// Consume a single `hunk` in unified diff format, that would be prefixed with `header`. 
35+      /// Note that all newlines are added. 
36+      /// 
37+      /// Note that the [`UnifiedDiff`](super::UnifiedDiff) sink will wrap its output in an [`std::io::Result`]. 
38+      /// After this method returned its first error, it will not be called anymore. 
39+      /// 
40+      /// The following is hunk-related information and the same that is used in the `header`. 
41+      /// * `before_hunk_start` is the 1-based first line of this hunk in the old file. 
42+      /// * `before_hunk_len` the amount of lines of this hunk in the old file. 
43+      /// * `after_hunk_start` is the 1-based first line of this hunk in the new file. 
44+      /// * `after_hunk_len` the amount of lines of this hunk in the new file. 
45+      fn  consume_hunk ( 
46+         & mut  self , 
47+         before_hunk_start :  u32 , 
48+         before_hunk_len :  u32 , 
49+         after_hunk_start :  u32 , 
50+         after_hunk_len :  u32 , 
51+         header :  & str , 
52+         hunk :  & [ u8 ] , 
53+     )  -> std:: io:: Result < ( ) > ; 
54+     /// Called after the last hunk is consumed to produce an output. 
55+      fn  finish ( self )  -> Self :: Out ; 
56+ } 
57+ 
2858pub ( super )  mod  _impl { 
59+     use  super :: { ConsumeHunk ,  ContextSize } ; 
60+     use  bstr:: { ByteSlice ,  ByteVec } ; 
2961    use  imara_diff:: { intern,  Sink } ; 
30-     use  std :: fmt :: { Display ,   Write } ; 
62+     use  intern :: { InternedInput ,   Interner ,   Token } ; 
3163    use  std:: hash:: Hash ; 
64+     use  std:: io:: ErrorKind ; 
3265    use  std:: ops:: Range ; 
3366
34-     use  super :: ContextSize ; 
35-     use  intern:: { InternedInput ,  Interner ,  Token } ; 
36- 
37-     /// A [`Sink`] that creates a textual diff 
38-      /// in the format typically output by git or gnu-diff if the `-u` option is used 
39-      pub  struct  UnifiedDiff < ' a ,  W ,  T > 
67+     /// A [`Sink`] that creates a textual diff in the format typically output by git or `gnu-diff` if the `-u` option is used, 
68+      /// and passes it in full to a consumer. 
69+      pub  struct  UnifiedDiff < ' a ,  T ,  D > 
4070    where 
41-         W :   Write , 
42-         T :   Hash  +  Eq  +  Display , 
71+         T :   Hash  +  Eq  +  AsRef < [ u8 ] > , 
72+         D :   ConsumeHunk , 
4373    { 
4474        before :  & ' a  [ Token ] , 
4575        after :  & ' a  [ Token ] , 
@@ -53,85 +83,91 @@ pub(super) mod _impl {
5383        /// Symmetrical context before and after the changed hunk. 
5484         ctx_size :  u32 , 
5585
56-         buffer :  String , 
57-         dst :  W , 
86+         buffer :  Vec < u8 > , 
87+         header_buf :  String , 
88+         delegate :  D , 
89+         newline :  & ' a  str , 
90+ 
91+         err :  Option < std:: io:: Error > , 
5892    } 
5993
60-     impl < ' a ,  T >  UnifiedDiff < ' a ,  String ,   T > 
94+     impl < ' a ,  T ,   D >  UnifiedDiff < ' a ,  T ,   D > 
6195    where 
62-         T :  Hash  + Eq  + Display , 
96+         T :  Hash  + Eq  + AsRef < [ u8 ] > , 
97+         D :  ConsumeHunk , 
6398    { 
6499        /// Create a new `UnifiedDiffBuilder` for the given `input`, 
65100         /// displaying `context_size` lines of context around each change, 
66-          /// that will return a [`String`]. 
67-          pub  fn  new ( input :  & ' a  InternedInput < T > ,  context_size :  ContextSize )  -> Self  { 
101+          /// that will write it output to the provided implementation of [`Write`]. 
102+          /// 
103+          /// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`, 
104+          pub  fn  new ( 
105+             input :  & ' a  InternedInput < T > , 
106+             consume_hunk :  D , 
107+             newline_separator :  & ' a  str , 
108+             context_size :  ContextSize , 
109+         )  -> Self  { 
68110            Self  { 
69111                before_hunk_start :  0 , 
70112                after_hunk_start :  0 , 
71113                before_hunk_len :  0 , 
72114                after_hunk_len :  0 , 
73-                 buffer :  String :: with_capacity ( 8 ) , 
74-                 dst :  String :: new ( ) , 
115+                 buffer :  Vec :: with_capacity ( 8 ) , 
116+                 header_buf :  String :: new ( ) , 
117+                 delegate :  consume_hunk, 
75118                interner :  & input. interner , 
76119                before :  & input. before , 
77120                after :  & input. after , 
78121                pos :  0 , 
79122                ctx_size :  context_size. symmetrical , 
80-             } 
81-         } 
82-     } 
123+                 newline :  newline_separator, 
83124
84-     impl < ' a ,  W ,  T >  UnifiedDiff < ' a ,  W ,  T > 
85-     where 
86-         W :  Write , 
87-         T :  Hash  + Eq  + Display , 
88-     { 
89-         /// Create a new `UnifiedDiffBuilder` for the given `input`, 
90-          /// displaying `context_size` lines of context around each change, 
91-          /// that will writes it output to the provided implementation of [`Write`]. 
92-          pub  fn  with_writer ( input :  & ' a  InternedInput < T > ,  writer :  W ,  context_size :  Option < u32 > )  -> Self  { 
93-             Self  { 
94-                 before_hunk_start :  0 , 
95-                 after_hunk_start :  0 , 
96-                 before_hunk_len :  0 , 
97-                 after_hunk_len :  0 , 
98-                 buffer :  String :: with_capacity ( 8 ) , 
99-                 dst :  writer, 
100-                 interner :  & input. interner , 
101-                 before :  & input. before , 
102-                 after :  & input. after , 
103-                 pos :  0 , 
104-                 ctx_size :  context_size. unwrap_or ( 3 ) , 
125+                 err :  None , 
105126            } 
106127        } 
107128
108129        fn  print_tokens ( & mut  self ,  tokens :  & [ Token ] ,  prefix :  char )  { 
109130            for  & token in  tokens { 
110-                 writeln ! ( & mut  self . buffer,  "{prefix}{}" ,  self . interner[ token] ) . unwrap ( ) ; 
131+                 self . buffer . push_char ( prefix) ; 
132+                 self . buffer . push_str ( & self . interner [ token] ) ; 
133+                 self . buffer . push_str ( self . newline . as_bytes ( ) ) ; 
111134            } 
112135        } 
113136
114-         fn  flush ( & mut  self )  { 
137+         fn  flush ( & mut  self )  -> std :: io :: Result < ( ) >   { 
115138            if  self . before_hunk_len  == 0  && self . after_hunk_len  == 0  { 
116-                 return ; 
139+                 return   Ok ( ( ) ) ; 
117140            } 
118141
119142            let  end = ( self . pos  + self . ctx_size ) . min ( self . before . len ( )  as  u32 ) ; 
120143            self . update_pos ( end,  end) ; 
121144
122-             writeln ! ( 
123-                 & mut  self . dst, 
124-                 "@@ -{},{} +{},{} @@" , 
145+             self . header_buf . clear ( ) ; 
146+ 
147+             std:: fmt:: Write :: write_fmt ( 
148+                 & mut  self . header_buf , 
149+                 format_args ! ( 
150+                     "@@ -{},{} +{},{} @@{nl}" , 
151+                     self . before_hunk_start + 1 , 
152+                     self . before_hunk_len, 
153+                     self . after_hunk_start + 1 , 
154+                     self . after_hunk_len, 
155+                     nl = self . newline
156+                 ) , 
157+             ) 
158+             . map_err ( |err| std:: io:: Error :: new ( ErrorKind :: Other ,  err) ) ?; 
159+             self . delegate . consume_hunk ( 
125160                self . before_hunk_start  + 1 , 
126161                self . before_hunk_len , 
127162                self . after_hunk_start  + 1 , 
128163                self . after_hunk_len , 
129-             ) 
130-             . unwrap ( ) ; 
131-             write ! ( & mut   self . dst ,   "{}" ,   & self . buffer ) . unwrap ( ) ; 
164+                  & self . header_buf , 
165+                  & self . buffer , 
166+             ) ? ; 
132167            self . buffer . clear ( ) ; 
133168            self . before_hunk_len  = 0 ; 
134-             self . after_hunk_len  = 0 
169+             self . after_hunk_len  = 0 ; 
170+             Ok ( ( ) ) 
135171        } 
136172
137173        fn  update_pos ( & mut  self ,  print_to :  u32 ,  move_to :  u32 )  { 
@@ -143,18 +179,24 @@ pub(super) mod _impl {
143179        } 
144180    } 
145181
146-     impl < W ,   T >  Sink  for  UnifiedDiff < ' _ ,  W ,   T > 
182+     impl < T ,   D >  Sink  for  UnifiedDiff < ' _ ,  T ,   D > 
147183    where 
148-         W :   Write , 
149-         T :   Hash  +  Eq  +  Display , 
184+         T :   Hash  +  Eq  +  AsRef < [ u8 ] > , 
185+         D :   ConsumeHunk , 
150186    { 
151-         type  Out  = W ; 
187+         type  Out  = std :: io :: Result < D :: Out > ; 
152188
153189        fn  process_change ( & mut  self ,  before :  Range < u32 > ,  after :  Range < u32 > )  { 
190+             if  self . err . is_some ( )  { 
191+                 return ; 
192+             } 
154193            if  ( ( self . pos  == 0 )  && ( before. start  - self . pos  > self . ctx_size ) ) 
155194                || ( before. start  - self . pos  > 2  *  self . ctx_size ) 
156195            { 
157-                 self . flush ( ) ; 
196+                 if  let  Err ( err)  = self . flush ( )  { 
197+                     self . err  = Some ( err) ; 
198+                     return ; 
199+                 } 
158200                self . pos  = before. start  - self . ctx_size ; 
159201                self . before_hunk_start  = self . pos ; 
160202                self . after_hunk_start  = after. start  - self . ctx_size ; 
@@ -167,8 +209,46 @@ pub(super) mod _impl {
167209        } 
168210
169211        fn  finish ( mut  self )  -> Self :: Out  { 
170-             self . flush ( ) ; 
171-             self . dst 
212+             if  let  Err ( err)  = self . flush ( )  { 
213+                 self . err  = Some ( err) ; 
214+             } 
215+             if  let  Some ( err)  = self . err  { 
216+                 return  Err ( err) ; 
217+             } 
218+             Ok ( self . delegate . finish ( ) ) 
219+         } 
220+     } 
221+ 
222+     /// An implementation that fails if the input isn't UTF-8. 
223+      impl  ConsumeHunk  for  String  { 
224+         type  Out  = Self ; 
225+ 
226+         fn  consume_hunk ( & mut  self ,  _:  u32 ,  _:  u32 ,  _:  u32 ,  _:  u32 ,  header :  & str ,  hunk :  & [ u8 ] )  -> std:: io:: Result < ( ) >  { 
227+             self . push_str ( header) ; 
228+             self . push_str ( 
229+                 hunk. to_str ( ) 
230+                     . map_err ( |err| std:: io:: Error :: new ( ErrorKind :: Other ,  err) ) ?, 
231+             ) ; 
232+             Ok ( ( ) ) 
233+         } 
234+ 
235+         fn  finish ( self )  -> Self :: Out  { 
236+             self 
237+         } 
238+     } 
239+ 
240+     /// An implementation that writes hunks into a byte buffer. 
241+      impl  ConsumeHunk  for  Vec < u8 >  { 
242+         type  Out  = Self ; 
243+ 
244+         fn  consume_hunk ( & mut  self ,  _:  u32 ,  _:  u32 ,  _:  u32 ,  _:  u32 ,  header :  & str ,  hunk :  & [ u8 ] )  -> std:: io:: Result < ( ) >  { 
245+             self . push_str ( header) ; 
246+             self . push_str ( hunk) ; 
247+             Ok ( ( ) ) 
248+         } 
249+ 
250+         fn  finish ( self )  -> Self :: Out  { 
251+             self 
172252        } 
173253    } 
174254} 
0 commit comments