Skip to content

Commit aae442d

Browse files
authored
Merge pull request #816 from samyron/neon-simd-parser
Optimize 'json_parse_string' using ARM Neon.
2 parents 6c41162 + 3ae3eeb commit aae442d

File tree

11 files changed

+376
-216
lines changed

11 files changed

+376
-216
lines changed

ext/json/ext/generator/depend

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
11
generator.o: generator.c $(srcdir)/../fbuffer/fbuffer.h
22
generator.o: generator.c $(srcdir)/../vendor/fpconv.c
33
generator.o: generator.c $(srcdir)/../vendor/jeaiii-ltoa.h
4-
generator.o: generator.c $(srcdir)/simd.h
4+
generator.o: generator.c $(srcdir)/../simd/simd.h

ext/json/ext/generator/extconf.rb

Lines changed: 1 addition & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -9,31 +9,7 @@
99
$defs << "-DJSON_DEBUG" if ENV["JSON_DEBUG"]
1010

1111
if enable_config('generator-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
12-
if RbConfig::CONFIG['host_cpu'] =~ /^(arm.*|aarch64.*)/
13-
# Try to compile a small program using NEON instructions
14-
if have_header('arm_neon.h')
15-
have_type('uint8x16_t', headers=['arm_neon.h']) && try_compile(<<~'SRC')
16-
#include <arm_neon.h>
17-
int main() {
18-
uint8x16_t test = vdupq_n_u8(32);
19-
return 0;
20-
}
21-
SRC
22-
$defs.push("-DJSON_ENABLE_SIMD")
23-
end
24-
end
25-
26-
if have_header('x86intrin.h') && have_type('__m128i', headers=['x86intrin.h']) && try_compile(<<~'SRC')
27-
#include <x86intrin.h>
28-
int main() {
29-
__m128i test = _mm_set1_epi8(32);
30-
return 0;
31-
}
32-
SRC
33-
$defs.push("-DJSON_ENABLE_SIMD")
34-
end
35-
36-
have_header('cpuid.h')
12+
require_relative "../simd/conf.rb"
3713
end
3814

3915
create_makefile 'json/ext/generator'

ext/json/ext/generator/generator.c

Lines changed: 6 additions & 58 deletions
Original file line numberDiff line numberDiff line change
@@ -5,7 +5,7 @@
55
#include <math.h>
66
#include <ctype.h>
77

8-
#include "simd.h"
8+
#include "../simd/simd.h"
99

1010
/* ruby api and some helpers */
1111

@@ -304,28 +304,6 @@ static inline FORCE_INLINE unsigned char neon_next_match(search_state *search)
304304
return 1;
305305
}
306306

307-
// See: https://community.arm.com/arm-community-blogs/b/servers-and-cloud-computing-blog/posts/porting-x86-vector-bitmask-optimizations-to-arm-neon
308-
static inline FORCE_INLINE uint64_t neon_match_mask(uint8x16_t matches)
309-
{
310-
const uint8x8_t res = vshrn_n_u16(vreinterpretq_u16_u8(matches), 4);
311-
const uint64_t mask = vget_lane_u64(vreinterpret_u64_u8(res), 0);
312-
return mask & 0x8888888888888888ull;
313-
}
314-
315-
static inline FORCE_INLINE uint64_t neon_rules_update(const char *ptr)
316-
{
317-
uint8x16_t chunk = vld1q_u8((const unsigned char *)ptr);
318-
319-
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
320-
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
321-
const uint8x16_t too_low_or_dbl_quote = vcltq_u8(veorq_u8(chunk, vdupq_n_u8(2)), vdupq_n_u8(33));
322-
323-
uint8x16_t has_backslash = vceqq_u8(chunk, vdupq_n_u8('\\'));
324-
uint8x16_t needs_escape = vorrq_u8(too_low_or_dbl_quote, has_backslash);
325-
326-
return neon_match_mask(needs_escape);
327-
}
328-
329307
static inline unsigned char search_escape_basic_neon(search_state *search)
330308
{
331309
if (RB_UNLIKELY(search->has_matches)) {
@@ -380,14 +358,8 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
380358
* no bytes need to be escaped and we can continue to the next chunk. If the mask is not 0 then we
381359
* have at least one byte that needs to be escaped.
382360
*/
383-
while (search->ptr + sizeof(uint8x16_t) <= search->end) {
384-
uint64_t mask = neon_rules_update(search->ptr);
385361

386-
if (!mask) {
387-
search->ptr += sizeof(uint8x16_t);
388-
continue;
389-
}
390-
search->matches_mask = mask;
362+
if (string_scan_simd_neon(&search->ptr, search->end, &search->matches_mask)) {
391363
search->has_matches = true;
392364
search->chunk_base = search->ptr;
393365
search->chunk_end = search->ptr + sizeof(uint8x16_t);
@@ -399,7 +371,7 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
399371
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
400372
char *s = copy_remaining_bytes(search, sizeof(uint8x16_t), remaining);
401373

402-
uint64_t mask = neon_rules_update(s);
374+
uint64_t mask = compute_chunk_mask_neon(s);
403375

404376
if (!mask) {
405377
// Nothing to escape, ensure search_flush doesn't do anything by setting
@@ -428,11 +400,6 @@ static inline unsigned char search_escape_basic_neon(search_state *search)
428400

429401
#ifdef HAVE_SIMD_SSE2
430402

431-
#define _mm_cmpge_epu8(a, b) _mm_cmpeq_epi8(_mm_max_epu8(a, b), a)
432-
#define _mm_cmple_epu8(a, b) _mm_cmpge_epu8(b, a)
433-
#define _mm_cmpgt_epu8(a, b) _mm_xor_si128(_mm_cmple_epu8(a, b), _mm_set1_epi8(-1))
434-
#define _mm_cmplt_epu8(a, b) _mm_cmpgt_epu8(b, a)
435-
436403
static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
437404
{
438405
int mask = search->matches_mask;
@@ -457,18 +424,6 @@ static inline FORCE_INLINE unsigned char sse2_next_match(search_state *search)
457424
#define TARGET_SSE2
458425
#endif
459426

460-
static inline TARGET_SSE2 FORCE_INLINE int sse2_update(const char *ptr)
461-
{
462-
__m128i chunk = _mm_loadu_si128((__m128i const*)ptr);
463-
464-
// Trick: c < 32 || c == 34 can be factored as c ^ 2 < 33
465-
// https://lemire.me/blog/2025/04/13/detect-control-characters-quotes-and-backslashes-efficiently-using-swar/
466-
__m128i too_low_or_dbl_quote = _mm_cmplt_epu8(_mm_xor_si128(chunk, _mm_set1_epi8(2)), _mm_set1_epi8(33));
467-
__m128i has_backslash = _mm_cmpeq_epi8(chunk, _mm_set1_epi8('\\'));
468-
__m128i needs_escape = _mm_or_si128(too_low_or_dbl_quote, has_backslash);
469-
return _mm_movemask_epi8(needs_escape);
470-
}
471-
472427
static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(search_state *search)
473428
{
474429
if (RB_UNLIKELY(search->has_matches)) {
@@ -487,17 +442,10 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
487442
}
488443
}
489444

490-
while (search->ptr + sizeof(__m128i) <= search->end) {
491-
int needs_escape_mask = sse2_update(search->ptr);
492-
493-
if (needs_escape_mask == 0) {
494-
search->ptr += sizeof(__m128i);
495-
continue;
496-
}
497-
445+
if (string_scan_simd_sse2(&search->ptr, search->end, &search->matches_mask)) {
498446
search->has_matches = true;
499-
search->matches_mask = needs_escape_mask;
500447
search->chunk_base = search->ptr;
448+
search->chunk_end = search->ptr + sizeof(__m128i);
501449
return sse2_next_match(search);
502450
}
503451

@@ -506,7 +454,7 @@ static inline TARGET_SSE2 FORCE_INLINE unsigned char search_escape_basic_sse2(se
506454
if (remaining >= SIMD_MINIMUM_THRESHOLD) {
507455
char *s = copy_remaining_bytes(search, sizeof(__m128i), remaining);
508456

509-
int needs_escape_mask = sse2_update(s);
457+
int needs_escape_mask = compute_chunk_mask_sse2(s);
510458

511459
if (needs_escape_mask == 0) {
512460
// Nothing to escape, ensure search_flush doesn't do anything by setting

ext/json/ext/generator/simd.h

Lines changed: 0 additions & 112 deletions
This file was deleted.

ext/json/ext/parser/depend

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1 +1,2 @@
11
parser.o: parser.c $(srcdir)/../fbuffer/fbuffer.h
2+
parser.o: parser.c $(srcdir)/../simd/simd.h

ext/json/ext/parser/extconf.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -8,4 +8,8 @@
88

99
append_cflags("-std=c99")
1010

11+
if enable_config('parser-use-simd', default=!ENV["JSON_DISABLE_SIMD"])
12+
require_relative "../simd/conf.rb"
13+
end
14+
1115
create_makefile 'json/ext/parser'

0 commit comments

Comments
 (0)