@@ -26,6 +26,35 @@ impl ContextSize {
26
26
}
27
27
}
28
28
29
+ /// Represents the type of a line in a unified diff.
30
+ #[ derive( Debug , Clone , Copy , PartialEq , Eq ) ]
31
+ pub enum DiffLineType {
32
+ /// A line that exists in both old and new versions (context line).
33
+ Context ,
34
+ /// A line that was added in the new version.
35
+ Add ,
36
+ /// A line that was removed from the old version.
37
+ Remove ,
38
+ }
39
+
40
+ impl DiffLineType {
41
+ fn to_prefix ( self ) -> char {
42
+ match self {
43
+ DiffLineType :: Context => ' ' ,
44
+ DiffLineType :: Add => '+' ,
45
+ DiffLineType :: Remove => '-' ,
46
+ }
47
+ }
48
+
49
+ fn to_byte_prefix ( self ) -> u8 {
50
+ match self {
51
+ DiffLineType :: Context => b' ' ,
52
+ DiffLineType :: Add => b'+' ,
53
+ DiffLineType :: Remove => b'-' ,
54
+ }
55
+ }
56
+ }
57
+
29
58
/// Specify where to put a newline.
30
59
#[ derive( Debug , Copy , Clone ) ]
31
60
pub enum NewlineSeparator < ' a > {
@@ -39,6 +68,31 @@ pub enum NewlineSeparator<'a> {
39
68
AfterHeaderAndWhenNeeded ( & ' a str ) ,
40
69
}
41
70
71
+ /// A utility trait for use in [`UnifiedDiffSink`](super::UnifiedDiffSink).
72
+ pub trait ConsumeTypedHunk {
73
+ /// TODO:
74
+ /// Document.
75
+ type Out ;
76
+
77
+ /// TODO:
78
+ /// Document.
79
+ /// How do we want to pass the header to `consume_hunk`? We can add an additional parameter
80
+ /// similar to `ConsumeHunk::consume_hunk` or add `DiffLineType::Header` in which case we
81
+ /// didn’t have to add an additional parameter.
82
+ fn consume_hunk (
83
+ & mut self ,
84
+ before_hunk_start : u32 ,
85
+ before_hunk_len : u32 ,
86
+ after_hunk_start : u32 ,
87
+ after_hunk_len : u32 ,
88
+ header : & str ,
89
+ lines : & [ ( DiffLineType , & [ u8 ] ) ] ,
90
+ ) -> std:: io:: Result < ( ) > ;
91
+
92
+ /// Called when processing is complete.
93
+ fn finish ( self ) -> Self :: Out ;
94
+ }
95
+
42
96
/// A utility trait for use in [`UnifiedDiff`](super::UnifiedDiff).
43
97
pub trait ConsumeHunk {
44
98
/// The item this instance produces after consuming all hunks.
@@ -75,18 +129,13 @@ pub(super) mod _impl {
75
129
use imara_diff:: { intern, Sink } ;
76
130
use intern:: { InternedInput , Interner , Token } ;
77
131
78
- use super :: { ConsumeHunk , ContextSize , NewlineSeparator } ;
79
-
80
- const CONTEXT : char = ' ' ;
81
- const ADDITION : char = '+' ;
82
- const REMOVAL : char = '-' ;
132
+ use super :: { ConsumeHunk , ConsumeTypedHunk , ContextSize , DiffLineType , NewlineSeparator } ;
83
133
84
- /// A [`Sink`] that creates a textual diff in the format typically output by git or `gnu-diff` if the `-u` option is used,
85
- /// and passes it in full to a consumer.
86
- pub struct UnifiedDiff < ' a , T , D >
134
+ /// A [`Sink`] that creates a unified diff and processes it hunk-by-hunk with structured type information.
135
+ pub struct UnifiedDiffSink < ' a , T , D >
87
136
where
88
137
T : Hash + Eq + AsRef < [ u8 ] > ,
89
- D : ConsumeHunk ,
138
+ D : ConsumeTypedHunk ,
90
139
{
91
140
before : & ' a [ Token ] ,
92
141
after : & ' a [ Token ] ,
@@ -106,26 +155,31 @@ pub(super) mod _impl {
106
155
107
156
/// Symmetrical context before and after the changed hunk.
108
157
ctx_size : u32 ,
158
+ // TODO:
159
+ // Is there a way to remove `newline` from `UnifiedDiffSink` as it is purely
160
+ // formatting-related?
161
+ // One option would be to introduce `HunkHeader` with a method `format_header` that could
162
+ // then be called outside `UnifiedDiffSink`, potentially taking `newline` as an argument.
109
163
newline : NewlineSeparator < ' a > ,
110
164
111
- buffer : Vec < u8 > ,
165
+ buffer : Vec < ( DiffLineType , Vec < u8 > ) > ,
112
166
header_buf : String ,
113
167
delegate : D ,
114
168
115
169
err : Option < std:: io:: Error > ,
116
170
}
117
171
118
- impl < ' a , T , D > UnifiedDiff < ' a , T , D >
172
+ impl < ' a , T , D > UnifiedDiffSink < ' a , T , D >
119
173
where
120
174
T : Hash + Eq + AsRef < [ u8 ] > ,
121
- D : ConsumeHunk ,
175
+ D : ConsumeTypedHunk ,
122
176
{
123
- /// Create a new instance to create unified diff using the lines in `input`,
177
+ /// Create a new instance to create a unified diff using the lines in `input`,
124
178
/// which also must be used when running the diff algorithm.
125
179
/// `context_size` is the amount of lines around each hunk which will be passed
126
- ///to `consume_hunk` .
180
+ /// to the sink .
127
181
///
128
- /// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator` .
182
+ /// The sink's `consume_hunk` method is called for each hunk with structured type information .
129
183
pub fn new (
130
184
input : & ' a InternedInput < T > ,
131
185
consume_hunk : D ,
@@ -154,21 +208,10 @@ pub(super) mod _impl {
154
208
}
155
209
}
156
210
157
- fn print_tokens ( & mut self , tokens : & [ Token ] , prefix : char ) {
211
+ fn print_tokens ( & mut self , tokens : & [ Token ] , line_type : DiffLineType ) {
158
212
for & token in tokens {
159
- self . buffer . push_char ( prefix) ;
160
- let line = & self . interner [ token] ;
161
- self . buffer . push_str ( line) ;
162
- match self . newline {
163
- NewlineSeparator :: AfterHeaderAndLine ( nl) => {
164
- self . buffer . push_str ( nl) ;
165
- }
166
- NewlineSeparator :: AfterHeaderAndWhenNeeded ( nl) => {
167
- if !line. as_ref ( ) . ends_with_str ( nl) {
168
- self . buffer . push_str ( nl) ;
169
- }
170
- }
171
- }
213
+ let content = self . interner [ token] . as_ref ( ) . to_vec ( ) ;
214
+ self . buffer . push ( ( line_type, content) ) ;
172
215
}
173
216
}
174
217
@@ -200,21 +243,36 @@ pub(super) mod _impl {
200
243
) ,
201
244
)
202
245
. map_err ( |err| std:: io:: Error :: new ( ErrorKind :: Other , err) ) ?;
246
+
247
+ // TODO:
248
+ // Is this explicit conversion necessary?
249
+ // Is the comment necessary?
250
+ // Convert Vec<(DiffLineType, Vec<u8>)> to Vec<(DiffLineType, &[u8])>
251
+ let lines: Vec < ( DiffLineType , & [ u8 ] ) > = self
252
+ . buffer
253
+ . iter ( )
254
+ . map ( |( line_type, content) | ( * line_type, content. as_slice ( ) ) )
255
+ . collect ( ) ;
256
+
203
257
self . delegate . consume_hunk (
204
258
hunk_start,
205
259
self . before_hunk_len ,
206
260
hunk_end,
207
261
self . after_hunk_len ,
208
262
& self . header_buf ,
209
- & self . buffer ,
263
+ & lines ,
210
264
) ?;
211
265
212
266
self . reset_hunks ( ) ;
213
267
Ok ( ( ) )
214
268
}
215
269
216
270
fn print_context_and_update_pos ( & mut self , print : Range < u32 > , move_to : u32 ) {
217
- self . print_tokens ( & self . before [ print. start as usize ..print. end as usize ] , CONTEXT ) ;
271
+ self . print_tokens (
272
+ & self . before [ print. start as usize ..print. end as usize ] ,
273
+ DiffLineType :: Context ,
274
+ ) ;
275
+
218
276
let len = print. end - print. start ;
219
277
self . ctx_pos = Some ( move_to) ;
220
278
self . before_hunk_len += len;
@@ -232,10 +290,10 @@ pub(super) mod _impl {
232
290
}
233
291
}
234
292
235
- impl < T , D > Sink for UnifiedDiff < ' _ , T , D >
293
+ impl < T , D > Sink for UnifiedDiffSink < ' _ , T , D >
236
294
where
237
295
T : Hash + Eq + AsRef < [ u8 ] > ,
238
- D : ConsumeHunk ,
296
+ D : ConsumeTypedHunk ,
239
297
{
240
298
type Out = std:: io:: Result < D :: Out > ;
241
299
@@ -270,8 +328,11 @@ pub(super) mod _impl {
270
328
self . before_hunk_len += before. end - before. start ;
271
329
self . after_hunk_len += after. end - after. start ;
272
330
273
- self . print_tokens ( & self . before [ before. start as usize ..before. end as usize ] , REMOVAL ) ;
274
- self . print_tokens ( & self . after [ after. start as usize ..after. end as usize ] , ADDITION ) ;
331
+ self . print_tokens (
332
+ & self . before [ before. start as usize ..before. end as usize ] ,
333
+ DiffLineType :: Remove ,
334
+ ) ;
335
+ self . print_tokens ( & self . after [ after. start as usize ..after. end as usize ] , DiffLineType :: Add ) ;
275
336
}
276
337
277
338
fn finish ( mut self ) -> Self :: Out {
@@ -285,6 +346,95 @@ pub(super) mod _impl {
285
346
}
286
347
}
287
348
349
+ /// A [`Sink`] that creates a textual diff in the format typically output by git or `gnu-diff` if the `-u` option is used,
350
+ /// and passes it in full to a consumer.
351
+ pub struct UnifiedDiff < ' a , D >
352
+ where
353
+ D : ConsumeHunk ,
354
+ {
355
+ delegate : D ,
356
+ newline : NewlineSeparator < ' a > ,
357
+ buffer : Vec < u8 > ,
358
+ }
359
+
360
+ impl < ' a , D > UnifiedDiff < ' a , D >
361
+ where
362
+ D : ConsumeHunk ,
363
+ {
364
+ /// Create a new instance to create a unified diff using the lines in `input`,
365
+ /// which also must be used when running the diff algorithm.
366
+ /// `context_size` is the amount of lines around each hunk which will be passed
367
+ /// to `consume_hunk`.
368
+ ///
369
+ /// `consume_hunk` is called for each hunk in unified-diff format, as created from each line separated by `newline_separator`.
370
+ pub fn new < T > (
371
+ input : & ' a InternedInput < T > ,
372
+ consume_hunk : D ,
373
+ newline_separator : NewlineSeparator < ' a > ,
374
+ context_size : ContextSize ,
375
+ ) -> UnifiedDiffSink < ' a , T , Self >
376
+ where
377
+ T : Hash + Eq + AsRef < [ u8 ] > ,
378
+ {
379
+ let formatter = Self {
380
+ delegate : consume_hunk,
381
+ newline : newline_separator,
382
+ buffer : Vec :: new ( ) ,
383
+ } ;
384
+ // TODO:
385
+ // Should this return a `UnifiedDiff` instead of a `UnifiedDiffSink`?
386
+ UnifiedDiffSink :: new ( input, formatter, newline_separator, context_size)
387
+ }
388
+
389
+ fn format_line ( & mut self , line_type : DiffLineType , content : & [ u8 ] ) {
390
+ self . buffer . push ( line_type. to_byte_prefix ( ) ) ;
391
+ self . buffer . push_str ( content) ;
392
+ match self . newline {
393
+ NewlineSeparator :: AfterHeaderAndLine ( nl) => {
394
+ self . buffer . push_str ( nl) ;
395
+ }
396
+ NewlineSeparator :: AfterHeaderAndWhenNeeded ( nl) => {
397
+ if !content. ends_with_str ( nl) {
398
+ self . buffer . push_str ( nl) ;
399
+ }
400
+ }
401
+ }
402
+ }
403
+ }
404
+
405
+ impl < D : ConsumeHunk > ConsumeTypedHunk for UnifiedDiff < ' _ , D > {
406
+ type Out = D :: Out ;
407
+
408
+ fn consume_hunk (
409
+ & mut self ,
410
+ before_hunk_start : u32 ,
411
+ before_hunk_len : u32 ,
412
+ after_hunk_start : u32 ,
413
+ after_hunk_len : u32 ,
414
+ header : & str ,
415
+ lines : & [ ( DiffLineType , & [ u8 ] ) ] ,
416
+ ) -> std:: io:: Result < ( ) > {
417
+ self . buffer . clear ( ) ;
418
+
419
+ for & ( line_type, content) in lines {
420
+ self . format_line ( line_type, content) ;
421
+ }
422
+
423
+ self . delegate . consume_hunk (
424
+ before_hunk_start,
425
+ before_hunk_len,
426
+ after_hunk_start,
427
+ after_hunk_len,
428
+ & header,
429
+ & self . buffer ,
430
+ )
431
+ }
432
+
433
+ fn finish ( self ) -> Self :: Out {
434
+ self . delegate . finish ( )
435
+ }
436
+ }
437
+
288
438
/// An implementation that fails if the input isn't UTF-8.
289
439
impl ConsumeHunk for String {
290
440
type Out = Self ;
@@ -317,4 +467,58 @@ pub(super) mod _impl {
317
467
self
318
468
}
319
469
}
470
+
471
+ impl ConsumeTypedHunk for String {
472
+ type Out = Self ;
473
+
474
+ fn consume_hunk (
475
+ & mut self ,
476
+ _: u32 ,
477
+ _: u32 ,
478
+ _: u32 ,
479
+ _: u32 ,
480
+ header : & str ,
481
+ lines : & [ ( DiffLineType , & [ u8 ] ) ] ,
482
+ ) -> std:: io:: Result < ( ) > {
483
+ self . push_str ( header) ;
484
+ for & ( line_type, content) in lines {
485
+ self . push ( line_type. to_prefix ( ) ) ;
486
+ // TODO:
487
+ // How does `impl ConsumeHunk for String` handle errors?
488
+ self . push_str ( std:: str:: from_utf8 ( content) . map_err ( |e| std:: io:: Error :: new ( ErrorKind :: Other , e) ) ?) ;
489
+ self . push ( '\n' ) ;
490
+ }
491
+ Ok ( ( ) )
492
+ }
493
+
494
+ fn finish ( self ) -> Self :: Out {
495
+ self
496
+ }
497
+ }
498
+
499
+ impl ConsumeTypedHunk for Vec < u8 > {
500
+ type Out = Self ;
501
+
502
+ fn consume_hunk (
503
+ & mut self ,
504
+ _: u32 ,
505
+ _: u32 ,
506
+ _: u32 ,
507
+ _: u32 ,
508
+ header : & str ,
509
+ lines : & [ ( DiffLineType , & [ u8 ] ) ] ,
510
+ ) -> std:: io:: Result < ( ) > {
511
+ self . push_str ( header) ;
512
+ for & ( line_type, content) in lines {
513
+ self . push ( line_type. to_byte_prefix ( ) ) ;
514
+ self . extend_from_slice ( content) ;
515
+ self . push ( b'\n' ) ;
516
+ }
517
+ Ok ( ( ) )
518
+ }
519
+
520
+ fn finish ( self ) -> Self :: Out {
521
+ self
522
+ }
523
+ }
320
524
}
0 commit comments