Skip to content

Commit c92bd6a

Browse files
committed
Various small optimizations and cleanups in the parser.
1 parent f7cd41c commit c92bd6a

File tree

2 files changed

+54
-43
lines changed

2 files changed

+54
-43
lines changed

Gemfile

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,7 @@ gemspec
55
group :development do
66
gem "ruby_memcheck" if RUBY_PLATFORM =~ /linux/i
77
gem "ostruct"
8+
gem 'power_assert', '< 3.0.0'
89
gem "rake"
910
gem "rake-compiler"
1011
gem "test-unit"

ext/json/ext/parser/parser.c

Lines changed: 53 additions & 43 deletions
Original file line numberDiff line numberDiff line change
@@ -95,6 +95,12 @@ static rb_encoding *enc_utf8;
9595

9696
#define JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH 55
9797

98+
#if (defined(__GNUC__ ) || defined(__clang__))
99+
#define FORCE_INLINE __attribute__((always_inline))
100+
#else
101+
#define FORCE_INLINE
102+
#endif
103+
98104
static inline VALUE build_interned_string(const char *str, const long length)
99105
{
100106
# ifdef HAVE_RB_ENC_INTERNED_STR
@@ -117,17 +123,51 @@ static void rvalue_cache_insert_at(rvalue_cache *cache, int index, VALUE rstring
117123
cache->entries[index] = rstring;
118124
}
119125

120-
static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
126+
static inline FORCE_INLINE int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
121127
{
128+
#if defined(__BYTE_ORDER__) && (__BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__) && defined(__has_builtin) && __has_builtin(__builtin_bswap64)
129+
const char *rptr;
130+
long rstring_length;
131+
132+
RSTRING_GETMEM(rstring, rptr, rstring_length);
133+
134+
if (length != rstring_length) {
135+
return (int)(length - rstring_length);
136+
}
137+
138+
long i = 0;
139+
140+
for (; i+8 <= length; i += 8) {
141+
uint64_t a, b;
142+
memcpy(&a, str + i, 8);
143+
memcpy(&b, rptr + i, 8);
144+
if (a != b) {
145+
a = __builtin_bswap64(a);
146+
b = __builtin_bswap64(b);
147+
return (a < b) ? -1 : 1;
148+
}
149+
}
150+
151+
for (; i < length; i++) {
152+
unsigned char ca = (unsigned char)str[i];
153+
unsigned char cb = (unsigned char)rptr[i];
154+
if (ca != cb) {
155+
return (ca < cb) ? -1 : 1;
156+
}
157+
}
158+
159+
return 0;
160+
#else
122161
long rstring_length = RSTRING_LEN(rstring);
123162
if (length == rstring_length) {
124163
return memcmp(str, RSTRING_PTR(rstring), length);
125164
} else {
126165
return (int)(length - rstring_length);
127166
}
167+
#endif
128168
}
129169

130-
static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
170+
static inline FORCE_INLINE VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const long length)
131171
{
132172
if (RB_UNLIKELY(length > JSON_RVALUE_CACHE_MAX_ENTRY_LENGTH)) {
133173
// Common names aren't likely to be very long. So we just don't
@@ -144,37 +184,25 @@ static VALUE rstring_cache_fetch(rvalue_cache *cache, const char *str, const lon
144184

145185
int low = 0;
146186
int high = cache->length - 1;
147-
int mid = 0;
148-
int last_cmp = 0;
149187

150188
while (low <= high) {
151-
mid = (high + low) >> 1;
189+
int mid = (high + low) >> 1;
152190
VALUE entry = cache->entries[mid];
153-
last_cmp = rstring_cache_cmp(str, length, entry);
191+
int cmp = rstring_cache_cmp(str, length, entry);
154192

155-
if (last_cmp == 0) {
193+
if (cmp == 0) {
156194
return entry;
157-
} else if (last_cmp > 0) {
195+
} else if (cmp > 0) {
158196
low = mid + 1;
159197
} else {
160198
high = mid - 1;
161199
}
162200
}
163201

164-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
165-
// We assume the overwhelming majority of names don't need to be escaped.
166-
// But if they do, we have to fallback to the slow path.
167-
return Qfalse;
168-
}
169-
170202
VALUE rstring = build_interned_string(str, length);
171203

172204
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
173-
if (last_cmp > 0) {
174-
mid += 1;
175-
}
176-
177-
rvalue_cache_insert_at(cache, mid, rstring);
205+
rvalue_cache_insert_at(cache, low, rstring);
178206
}
179207
return rstring;
180208
}
@@ -196,37 +224,25 @@ static VALUE rsymbol_cache_fetch(rvalue_cache *cache, const char *str, const lon
196224

197225
int low = 0;
198226
int high = cache->length - 1;
199-
int mid = 0;
200-
int last_cmp = 0;
201227

202228
while (low <= high) {
203-
mid = (high + low) >> 1;
229+
int mid = (high + low) >> 1;
204230
VALUE entry = cache->entries[mid];
205-
last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
231+
int cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
206232

207-
if (last_cmp == 0) {
233+
if (cmp == 0) {
208234
return entry;
209-
} else if (last_cmp > 0) {
235+
} else if (cmp > 0) {
210236
low = mid + 1;
211237
} else {
212238
high = mid - 1;
213239
}
214240
}
215241

216-
if (RB_UNLIKELY(memchr(str, '\\', length))) {
217-
// We assume the overwhelming majority of names don't need to be escaped.
218-
// But if they do, we have to fallback to the slow path.
219-
return Qfalse;
220-
}
221-
222242
VALUE rsymbol = build_symbol(str, length);
223243

224244
if (cache->length < JSON_RVALUE_CACHE_CAPA) {
225-
if (last_cmp > 0) {
226-
mid += 1;
227-
}
228-
229-
rvalue_cache_insert_at(cache, mid, rsymbol);
245+
rvalue_cache_insert_at(cache, low, rsymbol);
230246
}
231247
return rsymbol;
232248
}
@@ -596,7 +612,7 @@ json_eat_comments(JSON_ParserState *state)
596612
}
597613
}
598614

599-
static inline void
615+
static inline FORCE_INLINE void
600616
json_eat_whitespace(JSON_ParserState *state)
601617
{
602618
while (true) {
@@ -985,12 +1001,6 @@ static const bool string_scan_table[256] = {
9851001
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
9861002
};
9871003

988-
#if (defined(__GNUC__ ) || defined(__clang__))
989-
#define FORCE_INLINE __attribute__((always_inline))
990-
#else
991-
#define FORCE_INLINE
992-
#endif
993-
9941004
#ifdef HAVE_SIMD
9951005
static SIMD_Implementation simd_impl = SIMD_NONE;
9961006
#endif /* HAVE_SIMD */

0 commit comments

Comments
 (0)