Skip to content

Commit 441d1ae

Browse files
samyronbyroot
authored andcommitted
Various small optimizations and cleanups in the parser.
1 parent 349b490 commit 441d1ae

File tree

1 file changed

+47
-50
lines changed

1 file changed

+47
-50
lines changed

ext/json/ext/parser/parser.c

Lines changed: 47 additions & 50 deletions
Original file line numberDiff line numberDiff line change
@@ -78,17 +78,51 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
7878
cache->entries[index] = rstring;
7979
}
8080

81-
static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
81+
static ALWAYS_INLINE() int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
8282
{
83+
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && defined(__has_builtin) && __has_builtin(__builtin_bswap64)
84+
const char *rptr;
85+
long rstring_length;
86+
87+
RSTRING_GETMEM(rstring, rptr, rstring_length);
88+
89+
if (length != rstring_length) {
90+
return (int)(length - rstring_length);
91+
}
92+
93+
long i = 0;
94+
95+
for (; i+8 <= length; i += 8) {
96+
uint64_t a, b;
97+
memcpy(&a, str + i, 8);
98+
memcpy(&b, rptr + i, 8);
99+
if (a != b) {
100+
a = __builtin_bswap64(a);
101+
b = __builtin_bswap64(b);
102+
return (a < b) ? -1 : 1;
103+
}
104+
}
105+
106+
for (; i < length; i++) {
107+
unsigned char ca = (unsigned char)str[i];
108+
unsigned char cb = (unsigned char)rptr[i];
109+
if (ca != cb) {
110+
return (ca < cb) ? -1 : 1;
111+
}
112+
}
113+
114+
return 0;
115+
#else
83116
long rstring_length = RSTRING_LEN(rstring);
84117
if (length == rstring_length) {
85118
return memcmp(str, RSTRING_PTR(rstring), length);
86119
} else {
87120
return (int)(length - rstring_length);
88121
}
122+
#endif
89123
}
90124

91-
static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
125+
static ALWAYS_INLINE() VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
92126
{
93127
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
94128
// Common names aren't likely to be very long. So we just don't
@@ -105,37 +139,25 @@ static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const lon
105139

106140
int low = 0;
107141
int high = cache->length - 1;
108-
int mid = 0;
109-
int last_cmp = 0;
110142

111143
while (low <= high) {
112-
mid = (high + low) >> 1;
144+
int mid = (high + low) >> 1;
113145
VALUE entry = cache->entries[mid];
114-
last_cmp = rstring_cache_cmp(str, length, entry);
146+
int cmp = rstring_cache_cmp(str, length, entry);
115147

116-
if (last_cmp == 0) {
148+
if (cmp == 0) {
117149
return entry;
118-
} else if (last_cmp > 0) {
150+
} else if (cmp > 0) {
119151
low = mid + 1;
120152
} else {
121153
high = mid - 1;
122154
}
123155
}
124156

125-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
126-
// We assume the overwhelming majority of names don't need to be escaped.
127-
// But if they do, we have to fallback to the slow path.
128-
return Qfalse;
129-
}
130-
131157
VALUE rstring = build_interned_string(str, length);
132158

133159
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
134-
if (last_cmp > 0) {
135-
mid += 1;
136-
}
137-
138-
rvalue_cache_insert_at(cache, mid, rstring);
160+
rvalue_cache_insert_at(cache, low, rstring);
139161
}
140162
return rstring;
141163
}
@@ -157,37 +179,25 @@ static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const lon
157179

158180
int low = 0;
159181
int high = cache->length - 1;
160-
int mid = 0;
161-
int last_cmp = 0;
162182

163183
while (low <= high) {
164-
mid = (high + low) >> 1;
184+
int mid = (high + low) >> 1;
165185
VALUE entry = cache->entries[mid];
166-
last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
186+
int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
167187

168-
if (last_cmp == 0) {
188+
if (cmp == 0) {
169189
return entry;
170-
} else if (last_cmp > 0) {
190+
} else if (cmp > 0) {
171191
low = mid + 1;
172192
} else {
173193
high = mid - 1;
174194
}
175195
}
176196

177-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
178-
// We assume the overwhelming majority of names don't need to be escaped.
179-
// But if they do, we have to fallback to the slow path.
180-
return Qfalse;
181-
}
182-
183197
VALUE rsymbol = build_symbol(str, length);
184198

185199
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
186-
if (last_cmp > 0) {
187-
mid += 1;
188-
}
189-
190-
rvalue_cache_insert_at(cache, mid, rsymbol);
200+
rvalue_cache_insert_at(cache, low, rsymbol);
191201
}
192202
return rsymbol;
193203
}
@@ -557,7 +567,7 @@ json_eat_comments(JSON_ParserState *state)
557567
}
558568
}
559569

560-
static inline void
570+
static ALWAYS_INLINE() void
561571
json_eat_whitespace(JSON_ParserState *state)
562572
{
563573
while (true) {
@@ -652,19 +662,6 @@ static VALUE json_string_unescape(JSON_ParserState *state, const char *string, c
652662
int unescape_len;
653663
char buf[4];
654664

655-
if (is_name && state->in_array) {
656-
VALUE cached_key;
657-
if (RB_UNLIKELY(symbolize)) {
658-
cached_key = rsymbol_cache_fetch(&state->name_cache, string, bufferSize);
659-
} else {
660-
cached_key = rstring_cache_fetch(&state->name_cache, string, bufferSize);
661-
}
662-
663-
if (RB_LIKELY(cached_key)) {
664-
return cached_key;
665-
}
666-
}
667-
668665
VALUE result = rb_str_buf_new(bufferSize);
669666
rb_enc_associate_index(result, utf8_encindex);
670667
buffer = RSTRING_PTR(result);

0 commit comments

Comments
 (0)