@@ -639,44 +639,43 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
639639static VALUE json_string_unescape (JSON_ParserState * state , const char * string , const char * stringEnd , bool is_name , bool intern , bool symbolize )
640640{
641641 size_t bufferSize = stringEnd - string ;
642- const char * p = string , * pe = string , * unescape , * bufferStart ;
642+ const char * p = string , * pe = string , * bufferStart ;
643643 char * buffer ;
644- int unescape_len ;
645- char buf [4 ];
646644
647645 VALUE result = rb_str_buf_new (bufferSize );
648646 rb_enc_associate_index (result , utf8_encindex );
649647 buffer = RSTRING_PTR (result );
650648 bufferStart = buffer ;
651649
650+ #define APPEND_CHAR (chr ) *buffer++ = chr; p = ++pe;
651+
652652 while (pe < stringEnd && (pe = memchr (pe , '\\' , stringEnd - pe ))) {
653- unescape = (char * ) "?" ;
654- unescape_len = 1 ;
655653 if (pe > p ) {
656654 MEMCPY (buffer , p , char , pe - p );
657655 buffer += pe - p ;
658656 }
659657 switch (* ++ pe ) {
658+ case '"' :
659+ case '/' :
660+ p = pe ; // nothing to unescape just need to skip the backslash
661+ break ;
662+ case '\\' :
663+ APPEND_CHAR ('\\' );
664+ break ;
660665 case 'n' :
661- unescape = ( char * ) "\n" ;
666+ APPEND_CHAR ( '\n' ) ;
662667 break ;
663668 case 'r' :
664- unescape = ( char * ) "\r" ;
669+ APPEND_CHAR ( '\r' ) ;
665670 break ;
666671 case 't' :
667- unescape = (char * ) "\t" ;
668- break ;
669- case '"' :
670- unescape = (char * ) "\"" ;
671- break ;
672- case '\\' :
673- unescape = (char * ) "\\" ;
672+ APPEND_CHAR ('\t' );
674673 break ;
675674 case 'b' :
676- unescape = ( char * ) "\b" ;
675+ APPEND_CHAR ( '\b' ) ;
677676 break ;
678677 case 'f' :
679- unescape = ( char * ) "\f" ;
678+ APPEND_CHAR ( '\f' ) ;
680679 break ;
681680 case 'u' :
682681 if (pe > stringEnd - 5 ) {
@@ -714,18 +713,20 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
714713 break ;
715714 }
716715 }
717- unescape_len = convert_UTF32_to_UTF8 (buf , ch );
718- unescape = buf ;
716+
717+ char buf [4 ];
718+ int unescape_len = convert_UTF32_to_UTF8 (buf , ch );
719+ MEMCPY (buffer , buf , char , unescape_len );
720+ buffer += unescape_len ;
721+ p = ++ pe ;
719722 }
720723 break ;
721724 default :
722- p = pe ;
723- continue ;
725+ UNREACHABLE_RETURN ( Qundef ) ;
726+ break ;
724727 }
725- MEMCPY (buffer , unescape , char , unescape_len );
726- buffer += unescape_len ;
727- p = ++ pe ;
728728 }
729+ #undef APPEND_CHAR
729730
730731 if (stringEnd > p ) {
731732 MEMCPY (buffer , p , char , stringEnd - p );
@@ -925,6 +926,18 @@ static const bool string_scan_table[256] = {
925926 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
926927};
927928
929+ static const bool valid_escape_chars [256 ] = {
930+ ['\\' ] = true,
931+ ['"' ] = true,
932+ ['/' ] = true,
933+ ['b' ] = true,
934+ ['f' ] = true,
935+ ['n' ] = true,
936+ ['r' ] = true,
937+ ['t' ] = true,
938+ ['u' ] = true,
939+ };
940+
928941#ifdef HAVE_SIMD
929942static SIMD_Implementation simd_impl = SIMD_NONE ;
930943#endif /* HAVE_SIMD */
@@ -976,8 +989,12 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
976989 case '\\' : {
977990 state -> cursor ++ ;
978991 escaped = true;
979- if ((unsigned char )* state -> cursor < 0x20 ) {
980- raise_parse_error ("invalid ASCII control character in string: %s" , state );
992+
993+ if (RB_UNLIKELY (!valid_escape_chars [(unsigned char )* state -> cursor ])) {
994+ if ((unsigned char )* state -> cursor < 0x20 ) {
995+ raise_parse_error ("invalid ASCII control character in string: %s" , state );
996+ }
997+ raise_parse_error ("invalid escape character in string: %s" , state );
981998 }
982999 break ;
9831000 }
0 commit comments