diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index e591ca2c5..01234b5a8 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -557,14 +557,6 @@ static uint32_t unescape_unicode(JSON_ParserState *state, const unsigned char *p static const rb_data_type_t JSON_ParserConfig_type; -static const bool whitespace[256] = { - [' '] = 1, - ['\t'] = 1, - ['\n'] = 1, - ['\r'] = 1, - ['/'] = 1, -}; - static void json_eat_comments(JSON_ParserState *state) { @@ -607,12 +599,38 @@ json_eat_comments(JSON_ParserState *state) static inline void json_eat_whitespace(JSON_ParserState *state) { - unsigned char cursor; - while (RB_UNLIKELY(whitespace[cursor = (unsigned char)peek(state)])) { - if (RB_UNLIKELY(cursor == '/')) { - json_eat_comments(state); - } else { - state->cursor++; + while (true) { + switch (peek(state)) { + case ' ': + state->cursor++; + break; + case '\n': + state->cursor++; + + // Heuristic: if we see a newline, there is likely consecutive spaces after it. +#if defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__ + while (rest(state) > 8) { + uint64_t chunk; + memcpy(&chunk, state->cursor, sizeof(uint64_t)); + size_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT; + + state->cursor += consecutive_spaces; + if (consecutive_spaces != 8) { + break; + } + } +#endif + break; + case '\t': + case '\r': + state->cursor++; + break; + case '/': + json_eat_comments(state); + break; + + default: + return; } } } diff --git a/ext/json/ext/simd/simd.h b/ext/json/ext/simd/simd.h index 3abbdb020..2aa6c3d04 100644 --- a/ext/json/ext/simd/simd.h +++ b/ext/json/ext/simd/simd.h @@ -4,8 +4,6 @@ typedef enum { SIMD_SSE2 } SIMD_Implementation; -#ifdef JSON_ENABLE_SIMD - #ifdef __clang__ # if __has_builtin(__builtin_ctzll) # define HAVE_BUILTIN_CTZLL 1 @@ -54,6 +52,7 @@ static inline int trailing_zeros(int input) #define FORCE_INLINE #endif +#ifdef JSON_ENABLE_SIMD #define SIMD_MINIMUM_THRESHOLD 6