Skip to content

Commit 456ef91

Browse files
samyronmatzbot
authored andcommitted
[ruby/json] Use __builtin_memcpy, if available, to copy overlapping byte ranges in copy_remaining_bytes to avoid a branch to MEMCPY. Additionally use a space as padding byte instead of an 'X' so it can be represented diretly on AArch64 with a single instruction.
ruby/json@643ee11fed
1 parent bc6c895 commit 456ef91

File tree

3 files changed

+50
-4
lines changed

3 files changed

+50
-4
lines changed

ext/json/generator/generator.c

Lines changed: 33 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -297,13 +297,43 @@ ALWAYS_INLINE(static) char *copy_remaining_bytes(search_state *search, unsigned
297297
char *s = (buf->ptr + buf->len);
298298

299299
// Pad the buffer with dummy characters that won't need escaping.
300-
// This seem wateful at first sight, but memset of vector length is very fast.
301-
memset(s, 'X', vec_len);
300+
// This seem wasteful at first sight, but memset of vector length is very fast.
301+
// This is a space as it can be directly represented as an immediate on AArch64.
302+
memset(s, ' ', vec_len);
302303

303304
// Optimistically copy the remaining 'len' characters to the output FBuffer. If there are no characters
304305
// to escape, then everything ends up in the correct spot. Otherwise it was convenient temporary storage.
305-
MEMCPY(s, search->ptr, char, len);
306+
#if defined(__has_builtin) && __has_builtin(__builtin_memcpy)
307+
308+
#ifdef RBIMPL_ASSERT_OR_ASSUME
309+
RBIMPL_ASSERT_OR_ASSUME(len < 16);
310+
#endif
306311

312+
if (vec_len == 16 && len >= 4) {
313+
// If __builtin_memcpy is available, use it to copy between SIMD_MINIMUM_THRESHOLD and vec_len-1 bytes.
314+
// These copies overlap. The first copy will copy the first 8 (or 4) bytes. The second copy will copy
315+
// the last 8 (or 4) bytes but overlap with the first copy. The overlapping bytes will be in the correct
316+
// position in both copies.
317+
318+
// Please do not attempt to replace __builtin_memcpy with memcpy without profiling and/or looking at the
319+
// generated assembly. On clang-specifically (tested on Apple clang version 17.0.0 (clang-1700.0.13.3)),
320+
// when using memcpy, the compiler will notice the only difference is a 4 or 8 and generate a conditional
321+
// select instruction instead of direct loads and stores with a branch. This ends up slower than the branch
322+
// plus two loads and stores generated when using __builtin_memcpy.
323+
if (len >= 8) {
324+
__builtin_memcpy(s, search->ptr, 8);
325+
__builtin_memcpy(s + len - 8, search->ptr + len - 8, 8);
326+
} else {
327+
__builtin_memcpy(s, search->ptr, 4);
328+
__builtin_memcpy(s + len - 4, search->ptr + len - 4, 4);
329+
}
330+
} else {
331+
MEMCPY(s, search->ptr, char, len);
332+
}
333+
#else
334+
MEMCPY(s, search->ptr, char, len);
335+
#endif
336+
307337
return s;
308338
}
309339

ext/json/simd/simd.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -58,7 +58,7 @@ static inline int trailing_zeros(int input)
5858

5959
#ifdef JSON_ENABLE_SIMD
6060

61-
#define SIMD_MINIMUM_THRESHOLD 6
61+
#define SIMD_MINIMUM_THRESHOLD 4
6262

6363
#if defined(__ARM_NEON) || defined(__ARM_NEON__) || defined(__aarch64__) || defined(_M_ARM64)
6464
#include <arm_neon.h>

test/json/json_generator_test.rb

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -650,6 +650,22 @@ def test_backslash
650650
json = '"\\nabc"'
651651
assert_equal json, generate(data)
652652
#
653+
data = "\n"
654+
json = '"\\n"'
655+
assert_equal json, generate(data)
656+
#
657+
(0..16).each do |i|
658+
data = ('a' * i) + "\n"
659+
json = '"' + ('a' * i) + '\\n"'
660+
assert_equal json, generate(data)
661+
end
662+
#
663+
(0..16).each do |i|
664+
data = "\n" + ('a' * i)
665+
json = '"' + '\\n' + ('a' * i) + '"'
666+
assert_equal json, generate(data)
667+
end
668+
#
653669
data = ["'"]
654670
json = '["\\\'"]'
655671
assert_equal '["\'"]', generate(data)

0 commit comments

Comments
 (0)