@@ -592,96 +592,87 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
592592 }
593593 }
594594
595- pe = memchr (p , '\\' , bufferSize );
596- if (RB_UNLIKELY (pe == NULL )) {
597- return build_string (string , stringEnd , intern , symbolize );
598- }
599-
600595 VALUE result = rb_str_buf_new (bufferSize );
601596 rb_enc_associate_index (result , utf8_encindex );
602597 buffer = RSTRING_PTR (result );
603598 bufferStart = buffer ;
604599
605- while (pe < stringEnd ) {
606- if (* pe == '\\' ) {
607- unescape = (char * ) "?" ;
608- unescape_len = 1 ;
609- if (pe > p ) {
610- MEMCPY (buffer , p , char , pe - p );
611- buffer += pe - p ;
612- }
613- switch (* ++ pe ) {
614- case 'n' :
615- unescape = (char * ) "\n" ;
616- break ;
617- case 'r' :
618- unescape = (char * ) "\r" ;
619- break ;
620- case 't' :
621- unescape = (char * ) "\t" ;
622- break ;
623- case '"' :
624- unescape = (char * ) "\"" ;
625- break ;
626- case '\\' :
627- unescape = (char * ) "\\" ;
628- break ;
629- case 'b' :
630- unescape = (char * ) "\b" ;
631- break ;
632- case 'f' :
633- unescape = (char * ) "\f" ;
634- break ;
635- case 'u' :
636- if (pe > stringEnd - 4 ) {
637- raise_parse_error ("incomplete unicode character escape sequence at '%s'" , p );
638- } else {
639- uint32_t ch = unescape_unicode ((unsigned char * ) ++ pe );
640- pe += 3 ;
641- /* To handle values above U+FFFF, we take a sequence of
642- * \uXXXX escapes in the U+D800..U+DBFF then
643- * U+DC00..U+DFFF ranges, take the low 10 bits from each
644- * to make a 20-bit number, then add 0x10000 to get the
645- * final codepoint.
646- *
647- * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
648- * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
649- * Area".
650- */
651- if ((ch & 0xFC00 ) == 0xD800 ) {
652- pe ++ ;
653- if (pe > stringEnd - 6 ) {
654- raise_parse_error ("incomplete surrogate pair at '%s'" , p );
655- }
656- if (pe [0 ] == '\\' && pe [1 ] == 'u' ) {
657- uint32_t sur = unescape_unicode ((unsigned char * ) pe + 2 );
658- ch = (((ch & 0x3F ) << 10 ) | ((((ch >> 6 ) & 0xF ) + 1 ) << 16 )
659- | (sur & 0x3FF ));
660- pe += 5 ;
661- } else {
662- unescape = (char * ) "?" ;
663- break ;
664- }
600+ while ((pe = memchr (pe , '\\' , stringEnd - pe ))) {
601+ unescape = (char * ) "?" ;
602+ unescape_len = 1 ;
603+ if (pe > p ) {
604+ MEMCPY (buffer , p , char , pe - p );
605+ buffer += pe - p ;
606+ }
607+ switch (* ++ pe ) {
608+ case 'n' :
609+ unescape = (char * ) "\n" ;
610+ break ;
611+ case 'r' :
612+ unescape = (char * ) "\r" ;
613+ break ;
614+ case 't' :
615+ unescape = (char * ) "\t" ;
616+ break ;
617+ case '"' :
618+ unescape = (char * ) "\"" ;
619+ break ;
620+ case '\\' :
621+ unescape = (char * ) "\\" ;
622+ break ;
623+ case 'b' :
624+ unescape = (char * ) "\b" ;
625+ break ;
626+ case 'f' :
627+ unescape = (char * ) "\f" ;
628+ break ;
629+ case 'u' :
630+ if (pe > stringEnd - 4 ) {
631+ raise_parse_error ("incomplete unicode character escape sequence at '%s'" , p );
632+ } else {
633+ uint32_t ch = unescape_unicode ((unsigned char * ) ++ pe );
634+ pe += 3 ;
635+ /* To handle values above U+FFFF, we take a sequence of
636+ * \uXXXX escapes in the U+D800..U+DBFF then
637+ * U+DC00..U+DFFF ranges, take the low 10 bits from each
638+ * to make a 20-bit number, then add 0x10000 to get the
639+ * final codepoint.
640+ *
641+ * See Unicode 15: 3.8 "Surrogates", 5.3 "Handling
642+ * Surrogate Pairs in UTF-16", and 23.6 "Surrogates
643+ * Area".
644+ */
645+ if ((ch & 0xFC00 ) == 0xD800 ) {
646+ pe ++ ;
647+ if (pe > stringEnd - 6 ) {
648+ raise_parse_error ("incomplete surrogate pair at '%s'" , p );
649+ }
650+ if (pe [0 ] == '\\' && pe [1 ] == 'u' ) {
651+ uint32_t sur = unescape_unicode ((unsigned char * ) pe + 2 );
652+ ch = (((ch & 0x3F ) << 10 ) | ((((ch >> 6 ) & 0xF ) + 1 ) << 16 )
653+ | (sur & 0x3FF ));
654+ pe += 5 ;
655+ } else {
656+ unescape = (char * ) "?" ;
657+ break ;
665658 }
666- unescape_len = convert_UTF32_to_UTF8 (buf , ch );
667- unescape = buf ;
668659 }
669- break ;
670- default :
671- p = pe ;
672- continue ;
673- }
674- MEMCPY (buffer , unescape , char , unescape_len );
675- buffer += unescape_len ;
676- p = ++ pe ;
677- } else {
678- pe ++ ;
660+ unescape_len = convert_UTF32_to_UTF8 (buf , ch );
661+ unescape = buf ;
662+ }
663+ break ;
664+ default :
665+ p = pe ;
666+ continue ;
679667 }
668+ MEMCPY (buffer , unescape , char , unescape_len );
669+ buffer += unescape_len ;
670+ p = ++ pe ;
680671 }
681672
682- if (pe > p ) {
683- MEMCPY (buffer , p , char , pe - p );
684- buffer += pe - p ;
673+ if (stringEnd > p ) {
674+ MEMCPY (buffer , p , char , stringEnd - p );
675+ buffer += stringEnd - p ;
685676 }
686677 rb_str_set_len (result , buffer - bufferStart );
687678
0 commit comments