@@ -43,6 +43,7 @@ struct SerialContext<'ctx> {
4343 symbols : Option < Symbols < ' ctx > > ,
4444 previous_frag : Option < String > ,
4545 previous_line : Option < String > ,
46+ incomplete_utf8_buffer : Vec < u8 > ,
4647}
4748
4849impl < ' ctx > SerialContext < ' ctx > {
@@ -52,6 +53,40 @@ impl<'ctx> SerialContext<'ctx> {
5253 ..Self :: default ( )
5354 }
5455 }
56+
57+ fn process_utf8 ( & mut self , buff : & [ u8 ] ) -> String {
58+ let mut buffer = std:: mem:: take ( & mut self . incomplete_utf8_buffer ) ;
59+ buffer. extend ( normalized ( buff. iter ( ) . copied ( ) ) ) ;
60+
61+ // look for longest slice that we can then lossily convert without introducing errors for
62+ // partial sequences (#457)
63+ let mut len = 0 ;
64+
65+ loop {
66+ match std:: str:: from_utf8 ( & buffer[ len..] ) {
67+ // whole input is valid
68+ Ok ( str) if len == 0 => return String :: from ( str) ,
69+
70+ // input is valid after the last error, and we could ignore the last error, so
71+ // let's process the whole input
72+ Ok ( _) => return String :: from_utf8_lossy ( & buffer) . to_string ( ) ,
73+
74+ // input has some errors. We can ignore invalid sequences and replace them later,
75+ // but we have to stop if we encounter an incomplete sequence.
76+ Err ( e) => {
77+ len += e. valid_up_to ( ) ;
78+ if let Some ( error_len) = e. error_len ( ) {
79+ len += error_len;
80+ } else {
81+ // incomplete sequence. We split it off, save it for later
82+ let ( bytes, incomplete) = buffer. split_at ( len) ;
83+ self . incomplete_utf8_buffer = incomplete. to_vec ( ) ;
84+ return String :: from_utf8_lossy ( bytes) . to_string ( ) ;
85+ }
86+ }
87+ }
88+ }
89+ }
5590}
5691
5792/// Type that ensures that raw mode is disabled when dropped.
@@ -144,8 +179,7 @@ pub fn monitor(
144179
145180/// Handles and writes the received serial data to the given output stream.
146181fn handle_serial ( ctx : & mut SerialContext , buff : & [ u8 ] , out : & mut dyn Write ) {
147- let text: Vec < u8 > = normalized ( buff. iter ( ) . copied ( ) ) . collect ( ) ;
148- let text = String :: from_utf8_lossy ( & text) . to_string ( ) ;
182+ let text = ctx. process_utf8 ( buff) ;
149183
150184 // Split the text into lines, storing the last of which separately if it is
151185 // incomplete (ie. does not end with '\n') because these need special handling.
@@ -278,3 +312,75 @@ fn handle_key_event(key_event: KeyEvent) -> Option<Vec<u8>> {
278312
279313 key_str. map ( |slice| slice. into ( ) )
280314}
315+
316+ #[ cfg( test) ]
317+ mod test {
318+ #[ test]
319+ fn returns_valid_strings_immediately ( ) {
320+ let mut ctx = super :: SerialContext :: default ( ) ;
321+ let buff = b"Hello, world!" ;
322+ let text = ctx. process_utf8 ( buff) ;
323+ assert_eq ! ( text, "Hello, world!" ) ;
324+ }
325+
326+ #[ test]
327+ fn does_not_repeat_valid_strings ( ) {
328+ let mut ctx = super :: SerialContext :: default ( ) ;
329+ let text = ctx. process_utf8 ( b"Hello, world!" ) ;
330+ assert_eq ! ( text, "Hello, world!" ) ;
331+ let text = ctx. process_utf8 ( b"Something else" ) ;
332+ assert_eq ! ( text, "Something else" ) ;
333+ }
334+
335+ #[ test]
336+ fn replaces_invalid_sequence ( ) {
337+ let mut ctx = super :: SerialContext :: default ( ) ;
338+ let text = ctx. process_utf8 ( b"Hello, \xFF world!" ) ;
339+ assert_eq ! ( text, "Hello, \u{FFFD} world!" ) ;
340+ }
341+
342+ #[ test]
343+ fn can_replace_unfinished_incomplete_sequence ( ) {
344+ let mut ctx = super :: SerialContext :: default ( ) ;
345+ let mut incomplete = Vec :: from ( "Hello, " . as_bytes ( ) ) ;
346+ let utf8 = "🙈" . as_bytes ( ) ;
347+ incomplete. extend_from_slice ( & utf8[ ..utf8. len ( ) - 1 ] ) ;
348+ let text = ctx. process_utf8 ( & incomplete) ;
349+ assert_eq ! ( text, "Hello, " ) ;
350+
351+ let text = ctx. process_utf8 ( b" world!" ) ;
352+ assert_eq ! ( text, "\u{FFFD} world!" ) ;
353+ }
354+
355+ #[ test]
356+ fn can_merge_incomplete_sequence ( ) {
357+ let mut ctx = super :: SerialContext :: default ( ) ;
358+ let mut incomplete = Vec :: from ( "Hello, " . as_bytes ( ) ) ;
359+ let utf8 = "🙈" . as_bytes ( ) ;
360+ incomplete. extend_from_slice ( & utf8[ ..utf8. len ( ) - 1 ] ) ;
361+
362+ let text = ctx. process_utf8 ( & incomplete) ;
363+ assert_eq ! ( text, "Hello, " ) ;
364+
365+ let text = ctx. process_utf8 ( & utf8[ utf8. len ( ) - 1 ..] ) ;
366+ assert_eq ! ( text, "🙈" ) ;
367+ }
368+
369+ #[ test]
370+ fn issue_457 ( ) {
371+ let mut ctx = super :: SerialContext :: default ( ) ;
372+ let mut result = String :: new ( ) ;
373+
374+ result. push_str ( & ctx. process_utf8 ( & [ 0x48 ] ) ) ;
375+ result. push_str ( & ctx. process_utf8 ( & [ 0x65 , 0x6C , 0x6C ] ) ) ;
376+ result. push_str ( & ctx. process_utf8 ( & [
377+ 0x6F , 0x20 , 0x77 , 0x6F , 0x72 , 0x6C , 0x64 , 0x21 , 0x20 , 0x77 , 0x69 , 0x74 ,
378+ ] ) ) ;
379+ result. push_str ( & ctx. process_utf8 ( & [
380+ 0x68 , 0x20 , 0x55 , 0x54 , 0x46 , 0x3A , 0x20 , 0x77 , 0x79 , 0x73 , 0x79 ,
381+ ] ) ) ;
382+ result. push_str ( & ctx. process_utf8 ( & [ 0xC5 , 0x82 , 0x61 , 0x6D , 0x0A ] ) ) ;
383+
384+ assert_eq ! ( result, "Hello world! with UTF: wysyłam\r \n " ) ;
385+ }
386+ }
0 commit comments