@@ -616,8 +616,10 @@ static inline bool json_string_cacheable_p(const char *string, size_t length)
616616 return length <= JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH && rb_isalpha (string [0 ]);
617617}
618618
619- static inline VALUE json_string_fastpath (JSON_ParserState * state , const char * string , const char * stringEnd , bool is_name , bool intern , bool symbolize )
619+ static inline VALUE json_string_fastpath (JSON_ParserState * state , JSON_ParserConfig * config , const char * string , const char * stringEnd , bool is_name )
620620{
621+ bool intern = is_name || config -> freeze ;
622+ bool symbolize = is_name && config -> symbolize_names ;
621623 size_t bufferSize = stringEnd - string ;
622624
623625 if (is_name && state -> in_array && RB_LIKELY (json_string_cacheable_p (string , bufferSize ))) {
@@ -636,8 +638,33 @@ static inline VALUE json_string_fastpath(JSON_ParserState *state, const char *st
636638 return build_string (string , stringEnd , intern , symbolize );
637639}
638640
639- static VALUE json_string_unescape (JSON_ParserState * state , const char * string , const char * stringEnd , bool is_name , bool intern , bool symbolize )
641+ #define JSON_MAX_UNESCAPE_POSITIONS 16
642+ typedef struct _json_unescape_positions {
643+ long size ;
644+ const char * * positions ;
645+ bool has_more ;
646+ } JSON_UnescapePositions ;
647+
648+ static inline const char * json_next_backslash (const char * pe , const char * stringEnd , JSON_UnescapePositions * positions )
649+ {
650+ while (positions -> size ) {
651+ positions -> size -- ;
652+ const char * next_position = positions -> positions [0 ];
653+ positions -> positions ++ ;
654+ return next_position ;
655+ }
656+
657+ if (positions -> has_more ) {
658+ return memchr (pe , '\\' , stringEnd - pe );
659+ }
660+
661+ return NULL ;
662+ }
663+
664+ static NOINLINE () VALUE json_string_unescape (JSON_ParserState * state , JSON_ParserConfig * config , const char * string , const char * stringEnd , bool is_name , JSON_UnescapePositions * positions )
640665{
666+ bool intern = is_name || config -> freeze ;
667+ bool symbolize = is_name && config -> symbolize_names ;
641668 size_t bufferSize = stringEnd - string ;
642669 const char * p = string , * pe = string , * bufferStart ;
643670 char * buffer ;
@@ -649,7 +676,7 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
649676
650677#define APPEND_CHAR (chr ) *buffer++ = chr; p = ++pe;
651678
652- while (pe < stringEnd && (pe = memchr (pe , '\\' , stringEnd - pe ))) {
679+ while (pe < stringEnd && (pe = json_next_backslash (pe , stringEnd , positions ))) {
653680 if (pe > p ) {
654681 MEMCPY (buffer , p , char , pe - p );
655682 buffer += pe - p ;
@@ -893,20 +920,6 @@ static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfi
893920 return object ;
894921}
895922
896- static inline VALUE json_decode_string (JSON_ParserState * state , JSON_ParserConfig * config , const char * start , const char * end , bool escaped , bool is_name )
897- {
898- VALUE string ;
899- bool intern = is_name || config -> freeze ;
900- bool symbolize = is_name && config -> symbolize_names ;
901- if (escaped ) {
902- string = json_string_unescape (state , start , end , is_name , intern , symbolize );
903- } else {
904- string = json_string_fastpath (state , start , end , is_name , intern , symbolize );
905- }
906-
907- return string ;
908- }
909-
910923static inline VALUE json_push_value (JSON_ParserState * state , JSON_ParserConfig * config , VALUE value )
911924{
912925 if (RB_UNLIKELY (config -> on_load_proc )) {
@@ -964,22 +977,30 @@ static ALWAYS_INLINE() bool string_scan(JSON_ParserState *state)
964977 return false;
965978}
966979
967- static inline VALUE json_parse_string (JSON_ParserState * state , JSON_ParserConfig * config , bool is_name )
980+ static VALUE json_parse_escaped_string (JSON_ParserState * state , JSON_ParserConfig * config , bool is_name , const char * start )
968981{
969- state -> cursor ++ ;
970- const char * start = state -> cursor ;
971- bool escaped = false;
982+ const char * backslashes [JSON_MAX_UNESCAPE_POSITIONS ];
983+ JSON_UnescapePositions positions = {
984+ .size = 0 ,
985+ .positions = backslashes ,
986+ .has_more = false,
987+ };
972988
973- while ( RB_UNLIKELY ( string_scan ( state ))) {
989+ do {
974990 switch (* state -> cursor ) {
975991 case '"' : {
976- VALUE string = json_decode_string (state , config , start , state -> cursor , escaped , is_name );
992+ VALUE string = json_string_unescape (state , config , start , state -> cursor , is_name , & positions );
977993 state -> cursor ++ ;
978994 return json_push_value (state , config , string );
979995 }
980996 case '\\' : {
997+ if (RB_LIKELY (positions .size < JSON_MAX_UNESCAPE_POSITIONS )) {
998+ backslashes [positions .size ] = state -> cursor ;
999+ positions .size ++ ;
1000+ } else {
1001+ positions .has_more = true;
1002+ }
9811003 state -> cursor ++ ;
982- escaped = true;
9831004 break ;
9841005 }
9851006 default :
@@ -988,12 +1009,29 @@ static inline VALUE json_parse_string(JSON_ParserState *state, JSON_ParserConfig
9881009 }
9891010
9901011 state -> cursor ++ ;
991- }
1012+ } while ( string_scan ( state ));
9921013
9931014 raise_parse_error ("unexpected end of input, expected closing \"" , state );
9941015 return Qfalse ;
9951016}
9961017
1018+ static ALWAYS_INLINE () VALUE json_parse_string (JSON_ParserState * state , JSON_ParserConfig * config , bool is_name )
1019+ {
1020+ state -> cursor ++ ;
1021+ const char * start = state -> cursor ;
1022+
1023+ if (RB_UNLIKELY (!string_scan (state ))) {
1024+ raise_parse_error ("unexpected end of input, expected closing \"" , state );
1025+ }
1026+
1027+ if (RB_LIKELY (* state -> cursor == '"' )) {
1028+ VALUE string = json_string_fastpath (state , config , start , state -> cursor , is_name );
1029+ state -> cursor ++ ;
1030+ return json_push_value (state , config , string );
1031+ }
1032+ return json_parse_escaped_string (state , config , is_name , start );
1033+ }
1034+
9971035#if JSON_CPU_LITTLE_ENDIAN_64BITS
9981036// From: https://lemire.me/blog/2022/01/21/swar-explained-parsing-eight-digits/
9991037// Additional References:
0 commit comments