@@ -122,7 +122,7 @@ where
122122
123123 match decode_chunk ( data, start, true , allow_partial) ? {
124124 ( StringChunk :: StringEnd , ascii_only, index) => {
125- let s = to_str ( & data[ start..index] , ascii_only, start) ?;
125+ let s = to_str ( & data[ start..index] , ascii_only, start, allow_partial ) ?;
126126 Ok ( ( unsafe { StringOutput :: data ( s, ascii_only) } , index + 1 ) )
127127 }
128128 ( StringChunk :: Backslash , ascii_only, index) => {
@@ -162,7 +162,7 @@ fn decode_to_tape<'t, 'j>(
162162 }
163163 Err ( e) => {
164164 if allow_partial && e. error_type == JsonErrorType :: EofWhileParsingString {
165- let s = to_str ( tape, ascii_only, start) ?;
165+ let s = to_str ( tape, ascii_only, start, allow_partial ) ?;
166166 return Ok ( ( unsafe { StringOutput :: tape ( s, ascii_only) } , e. index ) ) ;
167167 }
168168 return Err ( e) ;
@@ -173,7 +173,7 @@ fn decode_to_tape<'t, 'j>(
173173 index += 1 ;
174174 } else {
175175 if allow_partial {
176- let s = to_str ( tape, ascii_only, start) ?;
176+ let s = to_str ( tape, ascii_only, start, allow_partial ) ?;
177177 return Ok ( ( unsafe { StringOutput :: tape ( s, ascii_only) } , index) ) ;
178178 }
179179 return json_err ! ( EofWhileParsingString , index) ;
@@ -183,7 +183,7 @@ fn decode_to_tape<'t, 'j>(
183183 ( StringChunk :: StringEnd , ascii_only, new_index) => {
184184 tape. extend_from_slice ( & data[ index..new_index] ) ;
185185 index = new_index + 1 ;
186- let s = to_str ( tape, ascii_only, start) ?;
186+ let s = to_str ( tape, ascii_only, start, allow_partial ) ?;
187187 return Ok ( ( unsafe { StringOutput :: tape ( s, ascii_only) } , index) ) ;
188188 }
189189 ( StringChunk :: Backslash , ascii_only_new, index_new) => {
@@ -344,13 +344,24 @@ static CHAR_TYPE: [CharType; 256] = {
344344 ]
345345} ;
346346
347- fn to_str ( bytes : & [ u8 ] , ascii_only : bool , start : usize ) -> JsonResult < & str > {
347+ fn to_str ( bytes : & [ u8 ] , ascii_only : bool , start : usize , allow_partial : bool ) -> JsonResult < & str > {
348348 if ascii_only {
349349 // safety: in this case we've already confirmed that all characters are ascii, we can safely
350350 // transmute from bytes to str
351351 Ok ( unsafe { from_utf8_unchecked ( bytes) } )
352352 } else {
353- from_utf8 ( bytes) . map_err ( |e| json_error ! ( InvalidUnicodeCodePoint , start + e. valid_up_to( ) + 1 ) )
353+ match from_utf8 ( bytes) {
354+ Ok ( s) => Ok ( s) ,
355+ Err ( e) if allow_partial && e. error_len ( ) . is_none ( ) => {
356+ // In partial mode, we handle incomplete (not invalid) UTF-8 sequences
357+ // by truncating to the last valid UTF-8 boundary
358+ // (`error_len()` is `None` for incomplete sequences)
359+ let valid_up_to = e. valid_up_to ( ) ;
360+ // SAFETY: `valid_up_to()` returns the byte index up to which the input is valid UTF-8
361+ Ok ( unsafe { from_utf8_unchecked ( & bytes[ ..valid_up_to] ) } )
362+ }
363+ Err ( e) => Err ( json_error ! ( InvalidUnicodeCodePoint , start + e. valid_up_to( ) + 1 ) ) ,
364+ }
354365 }
355366}
356367
0 commit comments