Skip to content

Commit 5bcc397

Browse files
committed
parser.c: Optimize json_parse_digits
We can use `ctz` builtin to get the number of consecutive digits. ``` == Parsing float parsing (2251051 bytes) ruby 3.4.6 (2025-09-16 revision dbd83256b1) +YJIT +PRISM [arm64-darwin24] Warming up -------------------------------------- after 25.000 i/100ms Calculating ------------------------------------- after 231.963 (± 0.0%) i/s (4.31 ms/i) - 1.175k in 5.065467s Comparison: before: 215.3 i/s after: 232.0 i/s - 1.08x faster ```
1 parent ded62a5 commit 5bcc397

File tree

4 files changed

+36
-8
lines changed

4 files changed

+36
-8
lines changed

ext/json/ext/parser/extconf.rb

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,6 +1,8 @@
11
# frozen_string_literal: true
22
require 'mkmf'
33

4+
$defs << "-DJSON_DEBUG" if ENV["JSON_DEBUG"]
5+
46
have_func("rb_enc_interned_str", "ruby/encoding.h") # RUBY_VERSION >= 3.0
57
have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2
68
have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby

ext/json/ext/parser/parser.c

Lines changed: 18 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -612,12 +612,14 @@ json_eat_whitespace(JSON_ParserState *state)
612612
while (rest(state) > 8) {
613613
uint64_t chunk;
614614
memcpy(&chunk, state->cursor, sizeof(uint64_t));
615-
size_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
615+
if (chunk == 0x2020202020202020) {
616+
state->cursor += 8;
617+
continue;
618+
}
616619

620+
uint32_t consecutive_spaces = trailing_zeros64(chunk ^ 0x2020202020202020) / CHAR_BIT;
617621
state->cursor += consecutive_spaces;
618-
if (consecutive_spaces != 8) {
619-
break;
620-
}
622+
break;
621623
}
622624
#endif
623625
break;
@@ -1095,19 +1097,27 @@ static inline int json_parse_digits(JSON_ParserState *state, uint64_t *accumulat
10951097
// Branchless version of: http://0x80.pl/articles/swar-digits-validate.html
10961098
uint64_t match = (next_8bytes & 0xF0F0F0F0F0F0F0F0) | (((next_8bytes + 0x0606060606060606) & 0xF0F0F0F0F0F0F0F0) >> 4);
10971099

1098-
if (match == 0x3333333333333333) { // 8 consecutive digits
1100+
if (match == 0x3333333333333333) { // 8 consecutive digits
10991101
*accumulator = (*accumulator * 100000000) + decode_8digits_unrolled(next_8bytes);
11001102
state->cursor += 8;
11011103
continue;
11021104
}
11031105

1104-
if ((match & 0xFFFFFFFF) == 0x33333333) { // 4 consecutive digits
1106+
uint32_t consecutive_digits = trailing_zeros64(match ^ 0x3333333333333333) / CHAR_BIT;
1107+
1108+
if (consecutive_digits >= 4) {
11051109
*accumulator = (*accumulator * 10000) + decode_4digits_unrolled((uint32_t)next_8bytes);
11061110
state->cursor += 4;
1107-
break;
1111+
consecutive_digits -= 4;
1112+
}
1113+
1114+
while (consecutive_digits) {
1115+
*accumulator = *accumulator * 10 + (*state->cursor - '0');
1116+
consecutive_digits--;
1117+
state->cursor++;
11081118
}
11091119

1110-
break;
1120+
return (int)(state->cursor - start);
11111121
}
11121122
#endif
11131123

ext/json/ext/simd/simd.h

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,7 @@
1+
#ifdef JSON_DEBUG
2+
#include <assert.h>
3+
#endif
4+
15
typedef enum {
26
SIMD_NONE,
37
SIMD_NEON,
@@ -18,6 +22,10 @@ typedef enum {
1822

1923
static inline uint32_t trailing_zeros64(uint64_t input)
2024
{
25+
#ifdef JSON_DEBUG
26+
assert(input > 0); // __builtin_ctz(0) is undefined behavior
27+
#endif
28+
2129
#if HAVE_BUILTIN_CTZLL
2230
return __builtin_ctzll(input);
2331
#else
@@ -33,6 +41,10 @@ static inline uint32_t trailing_zeros64(uint64_t input)
3341

3442
static inline int trailing_zeros(int input)
3543
{
44+
#ifdef JSON_DEBUG
45+
assert(input > 0); // __builtin_ctz(0) is undefined behavior
46+
#endif
47+
3648
#if HAVE_BUILTIN_CTZLL
3749
return __builtin_ctz(input);
3850
#else

test/json/json_parser_test.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -804,6 +804,10 @@ def test_parse_leading_slash
804804
end
805805
end
806806

807+
def test_parse_whitespace_after_newline
808+
assert_equal [], JSON.parse("[\n#{' ' * (8 + 8 + 4 + 3)}]")
809+
end
810+
807811
private
808812

809813
def assert_equal_float(expected, actual, delta = 1e-2)

0 commit comments

Comments
 (0)