@@ -43,6 +43,7 @@ struct SerialContext<'ctx> {
43
43
symbols : Option < Symbols < ' ctx > > ,
44
44
previous_frag : Option < String > ,
45
45
previous_line : Option < String > ,
46
+ incomplete_utf8_buffer : Vec < u8 > ,
46
47
}
47
48
48
49
impl < ' ctx > SerialContext < ' ctx > {
@@ -52,6 +53,40 @@ impl<'ctx> SerialContext<'ctx> {
52
53
..Self :: default ( )
53
54
}
54
55
}
56
+
57
+ fn process_utf8 ( & mut self , buff : & [ u8 ] ) -> String {
58
+ let mut buffer = std:: mem:: take ( & mut self . incomplete_utf8_buffer ) ;
59
+ buffer. extend ( normalized ( buff. iter ( ) . copied ( ) ) ) ;
60
+
61
+ // look for longest slice that we can then lossily convert without introducing errors for
62
+ // partial sequences (#457)
63
+ let mut len = 0 ;
64
+
65
+ loop {
66
+ match std:: str:: from_utf8 ( & buffer[ len..] ) {
67
+ // whole input is valid
68
+ Ok ( str) if len == 0 => return String :: from ( str) ,
69
+
70
+ // input is valid after the last error, and we could ignore the last error, so
71
+ // let's process the whole input
72
+ Ok ( _) => return String :: from_utf8_lossy ( & buffer) . to_string ( ) ,
73
+
74
+ // input has some errors. We can ignore invalid sequences and replace them later,
75
+ // but we have to stop if we encounter an incomplete sequence.
76
+ Err ( e) => {
77
+ len += e. valid_up_to ( ) ;
78
+ if let Some ( error_len) = e. error_len ( ) {
79
+ len += error_len;
80
+ } else {
81
+ // incomplete sequence. We split it off, save it for later
82
+ let ( bytes, incomplete) = buffer. split_at ( len) ;
83
+ self . incomplete_utf8_buffer = incomplete. to_vec ( ) ;
84
+ return String :: from_utf8_lossy ( bytes) . to_string ( ) ;
85
+ }
86
+ }
87
+ }
88
+ }
89
+ }
55
90
}
56
91
57
92
/// Type that ensures that raw mode is disabled when dropped.
@@ -144,8 +179,7 @@ pub fn monitor(
144
179
145
180
/// Handles and writes the received serial data to the given output stream.
146
181
fn handle_serial ( ctx : & mut SerialContext , buff : & [ u8 ] , out : & mut dyn Write ) {
147
- let text: Vec < u8 > = normalized ( buff. iter ( ) . copied ( ) ) . collect ( ) ;
148
- let text = String :: from_utf8_lossy ( & text) . to_string ( ) ;
182
+ let text = ctx. process_utf8 ( buff) ;
149
183
150
184
// Split the text into lines, storing the last of which separately if it is
151
185
// incomplete (ie. does not end with '\n') because these need special handling.
@@ -278,3 +312,75 @@ fn handle_key_event(key_event: KeyEvent) -> Option<Vec<u8>> {
278
312
279
313
key_str. map ( |slice| slice. into ( ) )
280
314
}
315
+
316
+ #[ cfg( test) ]
317
+ mod test {
318
+ #[ test]
319
+ fn returns_valid_strings_immediately ( ) {
320
+ let mut ctx = super :: SerialContext :: default ( ) ;
321
+ let buff = b"Hello, world!" ;
322
+ let text = ctx. process_utf8 ( buff) ;
323
+ assert_eq ! ( text, "Hello, world!" ) ;
324
+ }
325
+
326
+ #[ test]
327
+ fn does_not_repeat_valid_strings ( ) {
328
+ let mut ctx = super :: SerialContext :: default ( ) ;
329
+ let text = ctx. process_utf8 ( b"Hello, world!" ) ;
330
+ assert_eq ! ( text, "Hello, world!" ) ;
331
+ let text = ctx. process_utf8 ( b"Something else" ) ;
332
+ assert_eq ! ( text, "Something else" ) ;
333
+ }
334
+
335
+ #[ test]
336
+ fn replaces_invalid_sequence ( ) {
337
+ let mut ctx = super :: SerialContext :: default ( ) ;
338
+ let text = ctx. process_utf8 ( b"Hello, \xFF world!" ) ;
339
+ assert_eq ! ( text, "Hello, \u{FFFD} world!" ) ;
340
+ }
341
+
342
+ #[ test]
343
+ fn can_replace_unfinished_incomplete_sequence ( ) {
344
+ let mut ctx = super :: SerialContext :: default ( ) ;
345
+ let mut incomplete = Vec :: from ( "Hello, " . as_bytes ( ) ) ;
346
+ let utf8 = "🙈" . as_bytes ( ) ;
347
+ incomplete. extend_from_slice ( & utf8[ ..utf8. len ( ) - 1 ] ) ;
348
+ let text = ctx. process_utf8 ( & incomplete) ;
349
+ assert_eq ! ( text, "Hello, " ) ;
350
+
351
+ let text = ctx. process_utf8 ( b" world!" ) ;
352
+ assert_eq ! ( text, "\u{FFFD} world!" ) ;
353
+ }
354
+
355
+ #[ test]
356
+ fn can_merge_incomplete_sequence ( ) {
357
+ let mut ctx = super :: SerialContext :: default ( ) ;
358
+ let mut incomplete = Vec :: from ( "Hello, " . as_bytes ( ) ) ;
359
+ let utf8 = "🙈" . as_bytes ( ) ;
360
+ incomplete. extend_from_slice ( & utf8[ ..utf8. len ( ) - 1 ] ) ;
361
+
362
+ let text = ctx. process_utf8 ( & incomplete) ;
363
+ assert_eq ! ( text, "Hello, " ) ;
364
+
365
+ let text = ctx. process_utf8 ( & utf8[ utf8. len ( ) - 1 ..] ) ;
366
+ assert_eq ! ( text, "🙈" ) ;
367
+ }
368
+
369
+ #[ test]
370
+ fn issue_457 ( ) {
371
+ let mut ctx = super :: SerialContext :: default ( ) ;
372
+ let mut result = String :: new ( ) ;
373
+
374
+ result. push_str ( & ctx. process_utf8 ( & [ 0x48 ] ) ) ;
375
+ result. push_str ( & ctx. process_utf8 ( & [ 0x65 , 0x6C , 0x6C ] ) ) ;
376
+ result. push_str ( & ctx. process_utf8 ( & [
377
+ 0x6F , 0x20 , 0x77 , 0x6F , 0x72 , 0x6C , 0x64 , 0x21 , 0x20 , 0x77 , 0x69 , 0x74 ,
378
+ ] ) ) ;
379
+ result. push_str ( & ctx. process_utf8 ( & [
380
+ 0x68 , 0x20 , 0x55 , 0x54 , 0x46 , 0x3A , 0x20 , 0x77 , 0x79 , 0x73 , 0x79 ,
381
+ ] ) ) ;
382
+ result. push_str ( & ctx. process_utf8 ( & [ 0xC5 , 0x82 , 0x61 , 0x6D , 0x0A ] ) ) ;
383
+
384
+ assert_eq ! ( result, "Hello world! with UTF: wysyłam\r \n " ) ;
385
+ }
386
+ }
0 commit comments