@@ -639,44 +639,41 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
639639static VALUE json_string_unescape (JSON_ParserState * state , const char * string , const char * stringEnd , bool is_name , bool intern , bool symbolize )
640640{
641641 size_t bufferSize = stringEnd - string ;
642- const char * p = string , * pe = string , * unescape , * bufferStart ;
642+ const char * p = string , * pe = string , * bufferStart ;
643643 char * buffer ;
644- int unescape_len ;
645- char buf [4 ];
646644
647645 VALUE result = rb_str_buf_new (bufferSize );
648646 rb_enc_associate_index (result , utf8_encindex );
649647 buffer = RSTRING_PTR (result );
650648 bufferStart = buffer ;
651649
650+ #define APPEND_CHAR (chr ) *buffer++ = chr; p = ++pe;
651+
652652 while (pe < stringEnd && (pe = memchr (pe , '\\' , stringEnd - pe ))) {
653- unescape = (char * ) "?" ;
654- unescape_len = 1 ;
655653 if (pe > p ) {
656654 MEMCPY (buffer , p , char , pe - p );
657655 buffer += pe - p ;
658656 }
659657 switch (* ++ pe ) {
658+ case '"' :
659+ case '/' :
660+ case '\\' :
661+ p = pe ; // nothing to unescape just need to skip the backslash
662+ break ;
660663 case 'n' :
661- unescape = ( char * ) "\n" ;
664+ APPEND_CHAR ( '\n' ) ;
662665 break ;
663666 case 'r' :
664- unescape = ( char * ) "\r" ;
667+ APPEND_CHAR ( '\r' ) ;
665668 break ;
666669 case 't' :
667- unescape = (char * ) "\t" ;
668- break ;
669- case '"' :
670- unescape = (char * ) "\"" ;
671- break ;
672- case '\\' :
673- unescape = (char * ) "\\" ;
670+ APPEND_CHAR ('\t' );
674671 break ;
675672 case 'b' :
676- unescape = ( char * ) "\b" ;
673+ APPEND_CHAR ( '\b' ) ;
677674 break ;
678675 case 'f' :
679- unescape = ( char * ) "\f" ;
676+ APPEND_CHAR ( '\f' ) ;
680677 break ;
681678 case 'u' :
682679 if (pe > stringEnd - 5 ) {
@@ -714,18 +711,20 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
714711 break ;
715712 }
716713 }
717- unescape_len = convert_UTF32_to_UTF8 (buf , ch );
718- unescape = buf ;
714+
715+ char buf [4 ];
716+ int unescape_len = convert_UTF32_to_UTF8 (buf , ch );
717+ MEMCPY (buffer , buf , char , unescape_len );
718+ buffer += unescape_len ;
719+ p = ++ pe ;
719720 }
720721 break ;
721722 default :
722- p = pe ;
723- continue ;
723+ UNREACHABLE_RETURN ( Qundef ) ;
724+ break ;
724725 }
725- MEMCPY (buffer , unescape , char , unescape_len );
726- buffer += unescape_len ;
727- p = ++ pe ;
728726 }
727+ #undef APPEND_CHAR
729728
730729 if (stringEnd > p ) {
731730 MEMCPY (buffer , p , char , stringEnd - p );
@@ -925,6 +924,18 @@ static const bool string_scan_table[256] = {
925924 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 , 0 ,
926925};
927926
927+ static const bool valid_escape_chars [256 ] = {
928+ ['\\' ] = true,
929+ ['"' ] = true,
930+ ['/' ] = true,
931+ ['b' ] = true,
932+ ['f' ] = true,
933+ ['n' ] = true,
934+ ['r' ] = true,
935+ ['t' ] = true,
936+ ['u' ] = true,
937+ };
938+
928939#ifdef HAVE_SIMD
929940static SIMD_Implementation simd_impl = SIMD_NONE ;
930941#endif /* HAVE_SIMD */
@@ -976,8 +987,12 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
976987 case '\\' : {
977988 state -> cursor ++ ;
978989 escaped = true;
979- if ((unsigned char )* state -> cursor < 0x20 ) {
980- raise_parse_error ("invalid ASCII control character in string: %s" , state );
990+
991+ if (RB_UNLIKELY (!valid_escape_chars [(unsigned char )* state -> cursor ])) {
992+ if ((unsigned char )* state -> cursor < 0x20 ) {
993+ raise_parse_error ("invalid ASCII control character in string: %s" , state );
994+ }
995+ raise_parse_error ("invalid escape character in string: %s" , state );
981996 }
982997 break ;
983998 }
0 commit comments