Skip to content

Commit 34990ab

Browse files
committed
Handle integers bigger than 64bit
The capacity of handling arbitrary size integers is quite important to be able to replace the stdlib library. Unfortunately RapidJSON isn't very flexible here. For generating JSON it's quite easy to fallback to `Integer#to_s`. However for parsing, the only escape hatch is to parse all integers ourselves, which significantly impact performance. It could be a bit better by parsing in place, but all Ruby functions expect null terminated C-strings, so it wouldn't work well. Ruby number parsing is also not as fast at RapidJSON's. So for this to be performant, RapidJSON would need a flag that allows to only parse over-sized integers ourself, and not all numbers. But that would require a new feature upstream.
1 parent 28cb818 commit 34990ab

File tree

5 files changed

+54
-17
lines changed

5 files changed

+54
-17
lines changed

ext/rapidjson/cext.cc

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -8,6 +8,8 @@ static VALUE rb_mRapidjson;
88
static VALUE rb_eParseError;
99
static VALUE rb_eEncodeError;
1010

11+
static VALUE rb_LLONG_MIN = Qnil, rb_ULLONG_MAX = Qnil;
12+
1113
static ID id_to_json;
1214
static ID id_to_s;
1315

@@ -36,7 +38,8 @@ parse(VALUE _self, VALUE string) {
3638
Reader reader;
3739
char *cstring = StringValueCStr(string); // fixme?
3840
StringStream ss(cstring);
39-
ParseResult ok = reader.Parse(ss, handler);
41+
// TODO: rapidjson::kParseInsituFlag ?
42+
ParseResult ok = reader.Parse<rapidjson::kParseNumbersAsStringsFlag>(ss, handler);
4043

4144
if (!ok) {
4245
rb_raise(rb_eParseError, "JSON parse error: %s (%lu)",
@@ -67,6 +70,12 @@ Init_rapidjson(void)
6770
id_to_s = rb_intern("to_s");
6871
id_to_json = rb_intern("to_json");
6972

73+
rb_global_variable(&rb_LLONG_MIN);
74+
rb_global_variable(&rb_ULLONG_MAX);
75+
76+
rb_LLONG_MIN = LL2NUM(LLONG_MIN);
77+
rb_ULLONG_MAX = ULL2NUM(ULLONG_MAX);
78+
7079
rb_mRapidjson = rb_define_module("RapidJSON");
7180
rb_define_module_function(rb_mRapidjson, "encode", encode, 1);
7281
rb_define_module_function(rb_mRapidjson, "pretty_encode", pretty_encode, 1);

ext/rapidjson/encoder.hh

Lines changed: 15 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -67,12 +67,24 @@ class RubyObjectEncoder {
6767

6868
bool negative = rb_big_cmp(b, INT2FIX(0)) == INT2FIX(-1);
6969
if (negative) {
70-
long long ll = rb_big2ll(b);
71-
writer.Int64(ll);
72-
} else {
70+
if (rb_big_cmp(b, rb_LLONG_MIN) != INT2FIX(-1)) {
71+
long long ll = rb_big2ll(b);
72+
writer.Int64(ll);
73+
return;
74+
}
75+
} else if (rb_big_cmp(b, rb_ULLONG_MAX) == INT2FIX(-1)) {
7376
unsigned long long ull = rb_big2ull(b);
7477
writer.Uint64(ull);
78+
return;
7579
}
80+
81+
// If the number is too big, we go through Integer#to_s
82+
VALUE str = rb_funcall(b, id_to_s, 0);
83+
Check_Type(str, T_STRING);
84+
85+
// We should be able to use RawNumber here, but it's buggy
86+
// https://github.com/Tencent/rapidjson/issues/852
87+
writer.RawValue(RSTRING_PTR(str), RSTRING_LEN(str), kNumberType);
7688
}
7789

7890
void encode_float(VALUE v) {

ext/rapidjson/parser.hh

Lines changed: 16 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -33,20 +33,23 @@ struct RubyObjectHandler : public BaseReaderHandler<UTF8<>, RubyObjectHandler> {
3333
return PutValue(b ? Qtrue : Qfalse);
3434
}
3535

36-
bool Int(int i) {
37-
return PutValue(INT2FIX(i));
38-
}
39-
40-
bool Uint(unsigned u) {
41-
return PutValue(INT2FIX(u));
42-
}
43-
44-
bool Int64(int64_t i) {
45-
return PutValue(RB_LONG2NUM(i));
46-
}
36+
bool RawNumber(const char *str, SizeType length, bool copy) {
37+
// TODO: rapidjson::kParseInsituFlag ?
38+
// char tmp_string[length + 1];
39+
// memcpy(tmp_string, str, length);
40+
// tmp_string[length] = '\0';
41+
42+
SizeType index = 0;
43+
if (str[0] == '-') {
44+
index++;
45+
}
46+
for (; index < length; index++) {
47+
if (!isdigit(str[index])) {
48+
return Double(rb_cstr_to_dbl(str, false));
49+
}
50+
}
4751

48-
bool Uint64(uint64_t u) {
49-
return PutValue(RB_ULONG2NUM(u));
52+
return PutValue(rb_cstr2inum(str, 10));
5053
}
5154

5255
bool Double(double d) {

test/test_encoder.rb

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,10 @@ def test_encode_bignum
3333
assert_equal "18446744073709551615", encode(2**64 - 1)
3434
end
3535

36+
def test_encore_arbitrary_size_num
37+
assert_equal "340282366920938463463374607431768211456", encode(2**128)
38+
end
39+
3640
def test_encode_fixnum_exponents
3741
tests = []
3842
0.upto(65) do |exponent|

test/test_parser.rb

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,15 @@ def test_parse_string
5959
assert_equal "abcdefghijklmnopqrstuvwxyz", parse('"abcdefghijklmnopqrstuvwxyz"')
6060
end
6161

62+
def test_parse_bignum
63+
assert_equal 340282366920938463463374607431768211456, parse("340282366920938463463374607431768211456")
64+
end
65+
66+
def test_parse_huge_floats
67+
assert_equal 34028236692093846.3463374607431768211456, parse("34028236692093846.3463374607431768211456")
68+
assert_equal 0.0, parse("123.456e-789")
69+
end
70+
6271
def test_parse_invalida
6372
ex = assert_raises RapidJSON::ParseError do
6473
parse("abc")

0 commit comments

Comments
 (0)