diff --git a/.gitattributes b/.gitattributes new file mode 100644 index 000000000..caefad87e --- /dev/null +++ b/.gitattributes @@ -0,0 +1,2 @@ +ext/json/ext/parser/parser.c linguist-generated=true +java/src/json/ext/Parser.java linguist-generated=true diff --git a/.github/dependabot.yml b/.github/dependabot.yml new file mode 100644 index 000000000..6778b0493 --- /dev/null +++ b/.github/dependabot.yml @@ -0,0 +1,6 @@ +version: 2 +updates: + - package-ecosystem: 'github-actions' + directory: '/' + schedule: + interval: 'daily' diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 27171b90d..d4c981958 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -35,7 +35,7 @@ jobs: - { os: windows-latest, ruby: jruby-head } steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Ruby uses: ruby/setup-ruby-pkgs@v1 @@ -64,7 +64,7 @@ jobs: fail-fast: false steps: - - uses: actions/checkout@v3 + - uses: actions/checkout@v4 - name: Set up Ruby uses: ruby/setup-ruby-pkgs@v1 diff --git a/CHANGES.md b/CHANGES.md index 071251825..f9efe041d 100644 --- a/CHANGES.md +++ b/CHANGES.md @@ -1,12 +1,27 @@ # Changes -### UNRELEASED +### 2024-11-14 (2.8.2) + +* `JSON.load_file` explictly read the file as UTF-8. + +### 2024-11-06 (2.8.1) + +* Fix the java packages to include the extension. + +### 2024-11-06 (2.8.0) * Emit a deprecation warning when `JSON.load` create custom types without the `create_additions` option being explictly enabled. * Prefer to use `JSON.unsafe_load(string)` or `JSON.load(string, create_additions: true)`. * Emit a deprecation warning when serializing valid UTF-8 strings encoded in `ASCII_8BIT` aka `BINARY`. -* Bump required_ruby_version to 2.7. -* More performance improvments to `JSON.dump` and `JSON.generate`. +* Bump required Ruby version to 2.7. +* Add support for optionally parsing trailing commas, via `allow_trailing_comma: true`, which in cunjunction with the + pre-existing support for comments, make it suitable to parse `jsonc` documents. +* Many performance improvements to `JSON.parse` and `JSON.load`, up to `1.7x` faster on real world documents. +* Some minor performance improvements to `JSON.dump` and `JSON.generate`. + +### 2024-11-04 (2.7.6) + +* Fix a regression in JSON.generate when dealing with Hash keys that are string subclasses, call `to_json` on them. ### 2024-10-25 (2.7.5) diff --git a/Rakefile b/Rakefile index c5b518a1c..09b69a2e5 100644 --- a/Rakefile +++ b/Rakefile @@ -161,7 +161,7 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby' file JRUBY_PARSER_JAR => :compile do cd 'java/src' do parser_classes = FileList[ - "json/ext/ByteListTranscoder*.class", + "json/ext/ByteList*.class", "json/ext/OptionsReader*.class", "json/ext/Parser*.class", "json/ext/RuntimeInfo*.class", @@ -179,7 +179,7 @@ if defined?(RUBY_ENGINE) and RUBY_ENGINE == 'jruby' file JRUBY_GENERATOR_JAR => :compile do cd 'java/src' do generator_classes = FileList[ - "json/ext/ByteListTranscoder*.class", + "json/ext/ByteList*.class", "json/ext/OptionsReader*.class", "json/ext/Generator*.class", "json/ext/RuntimeInfo*.class", diff --git a/benchmark/encoder.rb b/benchmark/encoder.rb index 39d3e9061..acc5fa07b 100644 --- a/benchmark/encoder.rb +++ b/benchmark/encoder.rb @@ -17,7 +17,6 @@ def implementations(ruby_obj) state = JSON::State.new(JSON.dump_default_options) { - json_state: ["json (reuse)", proc { state.generate(ruby_obj) }], json: ["json", proc { JSON.generate(ruby_obj) }], oj: ["oj", proc { Oj.dump(ruby_obj) }], } @@ -58,27 +57,24 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [ # NB: Notes are based on ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] # On the first two micro benchmarks, the limitting factor is the fixed cost of initializing the -# generator state. Since `JSON.generate` now lazily allocate the `State` object we're now ~10% faster +# generator state. Since `JSON.generate` now lazily allocate the `State` object we're now ~10-20% faster # than `Oj.dump`. benchmark_encoding "small mixed", [1, "string", { a: 1, b: 2 }, [3, 4, 5]] benchmark_encoding "small nested array", [[1,2,3,4,5]]*10 - -# On small hash specifically, we're just on par with `Oj.dump`. Would be worth investigating why -# Hash serialization doesn't perform as well as other types. benchmark_encoding "small hash", { "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" } -# On string encoding we're ~20% faster when dealing with mostly ASCII, but ~10% slower when dealing -# with mostly multi-byte characters. This is a tradeoff. -benchmark_encoding "mixed utf8", ([("a" * 5000) + "€" + ("a" * 5000)] * 500), except: %i(json_state) -benchmark_encoding "mostly utf8", ([("€" * 3333)] * 500), except: %i(json_state) +# On string encoding we're ~20% faster when dealing with mostly ASCII, but ~50% slower when dealing +# with mostly multi-byte characters. There's likely some gains left to be had in multi-byte handling. +benchmark_encoding "mixed utf8", ([("a" * 5000) + "€" + ("a" * 5000)] * 500) +benchmark_encoding "mostly utf8", ([("€" * 3333)] * 500) # On these benchmarks we perform well, we're on par or better. benchmark_encoding "integers", (1_000_000..1_001_000).to_a, except: %i(json_state) -benchmark_encoding "activitypub.json", JSON.load_file("#{__dir__}/data/activitypub.json"), except: %i(json_state) -benchmark_encoding "citm_catalog.json", JSON.load_file("#{__dir__}/data/citm_catalog.json"), except: %i(json_state) +benchmark_encoding "activitypub.json", JSON.load_file("#{__dir__}/data/activitypub.json") +benchmark_encoding "citm_catalog.json", JSON.load_file("#{__dir__}/data/citm_catalog.json") -# On twitter.json we're still about 10% slower, this is worth investigating. -benchmark_encoding "twitter.json", JSON.load_file("#{__dir__}/data/twitter.json"), except: %i(json_state) +# On twitter.json we're still about 6% slower, this is worth investigating. +benchmark_encoding "twitter.json", JSON.load_file("#{__dir__}/data/twitter.json") # This benchmark spent the overwhelming majority of its time in `ruby_dtoa`. We rely on Ruby's implementation # which uses a relatively old version of dtoa.c from David M. Gay. @@ -89,8 +85,8 @@ def benchmark_encoding(benchmark_name, ruby_obj, check_expected: true, except: [ # but all these are implemented in C++11 or newer, making it hard if not impossible to include them. # Short of a pure C99 implementation of these newer algorithms, there isn't much that can be done to match # Oj speed without losing precision. -benchmark_encoding "canada.json", JSON.load_file("#{__dir__}/data/canada.json"), check_expected: false, except: %i(json_state) +benchmark_encoding "canada.json", JSON.load_file("#{__dir__}/data/canada.json"), check_expected: false # We're about 10% faster when `to_json` calls are involved, but this wasn't particularly optimized, there might be # opportunities here. -benchmark_encoding "many #to_json calls", [{object: Object.new, int: 12, float: 54.3, class: Float, time: Time.now, date: Date.today}] * 20, except: %i(json_state) +benchmark_encoding "many #to_json calls", [{object: Object.new, int: 12, float: 54.3, class: Float, time: Time.now, date: Date.today}] * 20 diff --git a/benchmark/parser.rb b/benchmark/parser.rb index 4425728a4..bacb8e9e6 100644 --- a/benchmark/parser.rb +++ b/benchmark/parser.rb @@ -19,7 +19,7 @@ def benchmark_parsing(name, json_output) Benchmark.ips do |x| x.report("json") { JSON.parse(json_output) } if RUN[:json] x.report("oj") { Oj.load(json_output) } if RUN[:oj] - x.report("Oj::Parser") { Oj::Parser.usual.parse(json_output) } if RUN[:oj] + x.report("Oj::Parser") { Oj::Parser.new(:usual).parse(json_output) } if RUN[:oj] x.report("rapidjson") { RapidJSON.parse(json_output) } if RUN[:rapidjson] x.compare!(order: :baseline) end @@ -28,27 +28,22 @@ def benchmark_parsing(name, json_output) # NB: Notes are based on ruby 3.3.4 (2024-07-09 revision be1089c8ec) +YJIT [arm64-darwin23] -# Oj::Parser is very significanly faster (1.80x) on the nested array benchmark. benchmark_parsing "small nested array", JSON.dump([[1,2,3,4,5]]*10) - -# Oj::Parser is significanly faster (~1.5x) on the next 4 benchmarks in large part because its -# cache is persisted across calls. That's not something we can do with the current API, we'd -# need to expose a stateful API as well, but that's no really desirable. -# Other than that we're faster than regular `Oj.load` by a good margin. benchmark_parsing "small hash", JSON.dump({ "username" => "jhawthorn", "id" => 123, "event" => "wrote json serializer" }) - benchmark_parsing "test from oj", <len = 0; } -#endif + +static void fbuffer_flush(FBuffer *fb) +{ + rb_io_write(fb->io, rb_utf8_str_new(fb->ptr, fb->len)); + fbuffer_clear(fb); +} static void fbuffer_do_inc_capa(FBuffer *fb, unsigned long requested) { + if (RB_UNLIKELY(fb->io)) { + fbuffer_flush(fb); + if (RB_LIKELY(requested < fb->capa)) { + return; + } + } + unsigned long required; if (RB_UNLIKELY(!fb->ptr)) { @@ -174,11 +186,18 @@ static void fbuffer_append_long(FBuffer *fb, long number) fbuffer_append(fb, buffer_end - len, len); } -static VALUE fbuffer_to_s(FBuffer *fb) +static VALUE fbuffer_finalize(FBuffer *fb) { - VALUE result = rb_utf8_str_new(FBUFFER_PTR(fb), FBUFFER_LEN(fb)); - fbuffer_free(fb); - return result; + if (fb->io) { + fbuffer_flush(fb); + fbuffer_free(fb); + rb_io_flush(fb->io); + return fb->io; + } else { + VALUE result = rb_utf8_str_new(FBUFFER_PTR(fb), FBUFFER_LEN(fb)); + fbuffer_free(fb); + return result; + } } #endif #endif diff --git a/ext/json/ext/generator/generator.c b/ext/json/ext/generator/generator.c index 80539af6c..503baca65 100644 --- a/ext/json/ext/generator/generator.c +++ b/ext/json/ext/generator/generator.c @@ -54,7 +54,7 @@ struct generate_json_data { }; static VALUE cState_from_state_s(VALUE self, VALUE opts); -static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func); +static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func, VALUE io); static void generate_json(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); static void generate_json_object(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); static void generate_json_array(FBuffer *buffer, struct generate_json_data *data, JSON_Generator_State *state, VALUE obj); @@ -118,8 +118,8 @@ static void convert_UTF8_to_JSON(FBuffer *out_buffer, VALUE str, const char esca case '\r': fbuffer_append(out_buffer, "\\r", 2); break; case '\t': fbuffer_append(out_buffer, "\\t", 2); break; default: { - scratch[2] = hexdig[ch >> 12]; - scratch[3] = hexdig[(ch >> 8) & 0xf]; + scratch[2] = '0'; + scratch[3] = '0'; scratch[4] = hexdig[(ch >> 4) & 0xf]; scratch[5] = hexdig[ch & 0xf]; fbuffer_append(out_buffer, scratch, 6); @@ -240,8 +240,8 @@ static void convert_ASCII_to_JSON(FBuffer *out_buffer, VALUE str, const char esc case '\r': fbuffer_append(out_buffer, "\\r", 2); break; case '\t': fbuffer_append(out_buffer, "\\t", 2); break; default: - scratch[2] = hexdig[ch >> 12]; - scratch[3] = hexdig[(ch >> 8) & 0xf]; + scratch[2] = '0'; + scratch[3] = '0'; scratch[4] = hexdig[(ch >> 4) & 0xf]; scratch[5] = hexdig[ch & 0xf]; fbuffer_append(out_buffer, scratch, 6); @@ -288,8 +288,8 @@ static void convert_UTF8_to_ASCII_only_JSON(FBuffer *out_buffer, VALUE str, cons case '\r': fbuffer_append(out_buffer, "\\r", 2); break; case '\t': fbuffer_append(out_buffer, "\\t", 2); break; default: { - scratch[2] = hexdig[ch >> 12]; - scratch[3] = hexdig[(ch >> 8) & 0xf]; + scratch[2] = '0'; + scratch[3] = '0'; scratch[4] = hexdig[(ch >> 4) & 0xf]; scratch[5] = hexdig[ch & 0xf]; fbuffer_append(out_buffer, scratch, 6); @@ -453,7 +453,7 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self) { rb_check_arity(argc, 0, 1); VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_object); + return cState_partial_generate(Vstate, self, generate_json_object, Qfalse); } /* @@ -467,7 +467,7 @@ static VALUE mHash_to_json(int argc, VALUE *argv, VALUE self) static VALUE mArray_to_json(int argc, VALUE *argv, VALUE self) { rb_check_arity(argc, 0, 1); VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_array); + return cState_partial_generate(Vstate, self, generate_json_array, Qfalse); } #ifdef RUBY_INTEGER_UNIFICATION @@ -480,7 +480,7 @@ static VALUE mInteger_to_json(int argc, VALUE *argv, VALUE self) { rb_check_arity(argc, 0, 1); VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_integer); + return cState_partial_generate(Vstate, self, generate_json_integer, Qfalse); } #else @@ -493,7 +493,7 @@ static VALUE mFixnum_to_json(int argc, VALUE *argv, VALUE self) { rb_check_arity(argc, 0, 1); VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_fixnum); + return cState_partial_generate(Vstate, self, generate_json_fixnum, Qfalse); } /* @@ -505,7 +505,7 @@ static VALUE mBignum_to_json(int argc, VALUE *argv, VALUE self) { rb_check_arity(argc, 0, 1); VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_bignum); + return cState_partial_generate(Vstate, self, generate_json_bignum, Qfalse); } #endif @@ -518,7 +518,7 @@ static VALUE mFloat_to_json(int argc, VALUE *argv, VALUE self) { rb_check_arity(argc, 0, 1); VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_float); + return cState_partial_generate(Vstate, self, generate_json_float, Qfalse); } /* @@ -543,7 +543,7 @@ static VALUE mString_to_json(int argc, VALUE *argv, VALUE self) { rb_check_arity(argc, 0, 1); VALUE Vstate = cState_from_state_s(cState, argc == 1 ? argv[0] : Qnil); - return cState_partial_generate(Vstate, self, generate_json_string); + return cState_partial_generate(Vstate, self, generate_json_string, Qfalse); } /* @@ -638,7 +638,7 @@ static VALUE mObject_to_json(int argc, VALUE *argv, VALUE self) rb_scan_args(argc, argv, "01", &state); Check_Type(string, T_STRING); state = cState_from_state_s(cState, state); - return cState_partial_generate(state, string, generate_json_string); + return cState_partial_generate(state, string, generate_json_string, Qfalse); } static void State_mark(void *ptr) @@ -1045,12 +1045,14 @@ static VALUE generate_json_rescue(VALUE d, VALUE exc) return Qundef; } -static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func) +static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func, VALUE io) { GET_STATE(self); char stack_buffer[FBUFFER_STACK_SIZE]; - FBuffer buffer = {0}; + FBuffer buffer = { + .io = RTEST(io) ? io : Qfalse, + }; fbuffer_stack_init(&buffer, state->buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE); struct generate_json_data data = { @@ -1062,19 +1064,12 @@ static VALUE cState_partial_generate(VALUE self, VALUE obj, generator_func func) }; rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data); - return fbuffer_to_s(&buffer); + return fbuffer_finalize(&buffer); } -/* - * call-seq: generate(obj) - * - * Generates a valid JSON document from object +obj+ and returns the - * result. If no valid JSON document can be created this method raises a - * GeneratorError exception. - */ -static VALUE cState_generate(VALUE self, VALUE obj) +static VALUE cState_generate(VALUE self, VALUE obj, VALUE io) { - VALUE result = cState_partial_generate(self, obj, generate_json); + VALUE result = cState_partial_generate(self, obj, generate_json, io); GET_STATE(self); (void)state; return result; @@ -1502,14 +1497,16 @@ static VALUE cState_configure(VALUE self, VALUE opts) return self; } -static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts) +static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts, VALUE io) { JSON_Generator_State state = {0}; state_init(&state); configure_state(&state, opts); char stack_buffer[FBUFFER_STACK_SIZE]; - FBuffer buffer = {0}; + FBuffer buffer = { + .io = RTEST(io) ? io : Qfalse, + }; fbuffer_stack_init(&buffer, state.buffer_initial_length, stack_buffer, FBUFFER_STACK_SIZE); struct generate_json_data data = { @@ -1521,7 +1518,7 @@ static VALUE cState_m_generate(VALUE klass, VALUE obj, VALUE opts) }; rb_rescue(generate_json_try, (VALUE)&data, generate_json_rescue, (VALUE)&data); - return fbuffer_to_s(&buffer); + return fbuffer_finalize(&buffer); } /* @@ -1583,9 +1580,9 @@ void Init_generator(void) rb_define_method(cState, "depth=", cState_depth_set, 1); rb_define_method(cState, "buffer_initial_length", cState_buffer_initial_length, 0); rb_define_method(cState, "buffer_initial_length=", cState_buffer_initial_length_set, 1); - rb_define_method(cState, "generate", cState_generate, 1); + rb_define_private_method(cState, "_generate", cState_generate, 2); - rb_define_singleton_method(cState, "generate", cState_m_generate, 2); + rb_define_singleton_method(cState, "generate", cState_m_generate, 3); VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods"); diff --git a/ext/json/ext/parser/extconf.rb b/ext/json/ext/parser/extconf.rb index f9104de12..4c1ac52a7 100644 --- a/ext/json/ext/parser/extconf.rb +++ b/ext/json/ext/parser/extconf.rb @@ -5,6 +5,7 @@ have_func("rb_hash_new_capa", "ruby.h") # RUBY_VERSION >= 3.2 have_func("rb_gc_mark_locations", "ruby.h") # Missing on TruffleRuby have_func("rb_hash_bulk_insert", "ruby.h") # Missing on TruffleRuby +have_func("rb_category_warn", "ruby.h") # Missing on TruffleRuby append_cflags("-std=c99") diff --git a/ext/json/ext/parser/parser.c b/ext/json/ext/parser/parser.c index 382e21e12..b6252556b 100644 --- a/ext/json/ext/parser/parser.c +++ b/ext/json/ext/parser/parser.c @@ -17,6 +17,17 @@ static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbo static int binary_encindex; static int utf8_encindex; +#ifdef HAVE_RB_CATEGORY_WARN +# define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message) +#else +# define json_deprecated(message) rb_warn(message) +#endif + +static const char deprecated_create_additions_warning[] = + "JSON.load implicit support for `create_additions: true` is deprecated " + "and will be removed in 3.0, use JSON.unsafe_load or explicitly " + "pass `create_additions: true`"; + #ifndef HAVE_RB_GC_MARK_LOCATIONS // For TruffleRuby void rb_gc_mark_locations(const VALUE *start, const VALUE *end) @@ -383,6 +394,7 @@ typedef struct JSON_ParserStruct { VALUE decimal_class; VALUE match_string; FBuffer fbuffer; + int in_array; int max_nesting; bool allow_nan; bool allow_trailing_comma; @@ -411,8 +423,7 @@ static const rb_data_type_t JSON_Parser_type; static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); @@ -438,11 +449,11 @@ static void raise_parse_error(const char *format, const char *start) -#line 464 "parser.rl" +#line 475 "parser.rl" -#line 446 "parser.c" +#line 457 "parser.c" enum {JSON_object_start = 1}; enum {JSON_object_first_final = 32}; enum {JSON_object_error = 0}; @@ -450,7 +461,7 @@ enum {JSON_object_error = 0}; enum {JSON_object_en_main = 1}; -#line 504 "parser.rl" +#line 515 "parser.rl" #define PUSH(result) rvalue_stack_push(json->stack, result, &json->stack_handle, &json->stack) @@ -466,14 +477,14 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu long stack_head = json->stack->head; -#line 470 "parser.c" +#line 481 "parser.c" { cs = JSON_object_start; } -#line 519 "parser.rl" +#line 530 "parser.rl" -#line 477 "parser.c" +#line 488 "parser.c" { short _widec; if ( p == pe ) @@ -502,7 +513,7 @@ case 2: goto st2; goto st0; tr2: -#line 483 "parser.rl" +#line 494 "parser.rl" { char *np; json->parsing_name = true; @@ -518,7 +529,7 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 522 "parser.c" +#line 533 "parser.c" switch( (*p) ) { case 13: goto st3; case 32: goto st3; @@ -585,7 +596,7 @@ case 8: goto st8; goto st0; tr11: -#line 472 "parser.rl" +#line 483 "parser.rl" { char *np = JSON_parse_value(json, p, pe, result, current_nesting); if (np == NULL) { @@ -599,20 +610,20 @@ case 8: if ( ++p == pe ) goto _test_eof9; case 9: -#line 603 "parser.c" +#line 614 "parser.c" _widec = (*p); if ( (*p) < 13 ) { if ( (*p) > 9 ) { if ( 10 <= (*p) && (*p) <= 10 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 13 ) { @@ -620,26 +631,26 @@ case 9: if ( 32 <= (*p) && (*p) <= 32 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 44 ) { if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -660,14 +671,14 @@ case 9: goto st10; goto st0; tr4: -#line 494 "parser.rl" +#line 505 "parser.rl" { p--; {p++; cs = 32; goto _out;} } goto st32; st32: if ( ++p == pe ) goto _test_eof32; case 32: -#line 671 "parser.c" +#line 682 "parser.c" goto st0; st10: if ( ++p == pe ) @@ -769,13 +780,13 @@ case 20: if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -794,20 +805,20 @@ case 21: if ( (*p) <= 41 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 42 ) { if ( 43 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -830,13 +841,13 @@ case 22: if ( 42 <= (*p) && (*p) <= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 46 ) { @@ -844,19 +855,19 @@ case 22: if ( 48 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -880,20 +891,20 @@ case 23: if ( (*p) <= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 10 ) { if ( 11 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 481 "parser.rl" +#line 492 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -1007,7 +1018,7 @@ case 31: _out: {} } -#line 520 "parser.rl" +#line 531 "parser.rl" if (cs >= JSON_object_first_final) { long count = json->stack->head - stack_head; @@ -1045,7 +1056,7 @@ case 31: VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { if (json->deprecated_create_additions) { - rb_warn("JSON.load implicit support for `create_additions: true` is deprecated and will be removed in 3.0, use JSON.unsafe_load or explicitly pass `create_additions: true`"); + json_deprecated(deprecated_create_additions_warning); } *result = rb_funcall(klass, i_json_create, 1, *result); } @@ -1058,7 +1069,7 @@ case 31: } -#line 1062 "parser.c" +#line 1073 "parser.c" enum {JSON_value_start = 1}; enum {JSON_value_first_final = 29}; enum {JSON_value_error = 0}; @@ -1066,7 +1077,7 @@ enum {JSON_value_error = 0}; enum {JSON_value_en_main = 1}; -#line 655 "parser.rl" +#line 664 "parser.rl" static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -1074,14 +1085,14 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul int cs = EVIL; -#line 1078 "parser.c" +#line 1089 "parser.c" { cs = JSON_value_start; } -#line 662 "parser.rl" +#line 671 "parser.rl" -#line 1085 "parser.c" +#line 1096 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1115,7 +1126,7 @@ case 1: cs = 0; goto _out; tr2: -#line 598 "parser.rl" +#line 609 "parser.rl" { char *np = JSON_parse_string(json, p, pe, result); if (np == NULL) { @@ -1127,7 +1138,7 @@ cs = 0; } goto st29; tr3: -#line 608 "parser.rl" +#line 619 "parser.rl" { char *np; if(pe > p + 8 && !strncmp(MinusInfinity, p, 9)) { @@ -1139,11 +1150,7 @@ cs = 0; raise_parse_error("unexpected token at '%s'", p); } } - np = JSON_parse_float(json, p, pe, result); - if (np != NULL) { - {p = (( np))-1;} - } - np = JSON_parse_integer(json, p, pe, result); + np = JSON_parse_number(json, p, pe, result); if (np != NULL) { {p = (( np))-1;} } @@ -1151,15 +1158,17 @@ cs = 0; } goto st29; tr7: -#line 630 "parser.rl" +#line 637 "parser.rl" { char *np; + json->in_array++; np = JSON_parse_array(json, p, pe, result, current_nesting + 1); + json->in_array--; if (np == NULL) { p--; {p++; cs = 29; goto _out;} } else {p = (( np))-1;} } goto st29; tr11: -#line 636 "parser.rl" +#line 645 "parser.rl" { char *np; np = JSON_parse_object(json, p, pe, result, current_nesting + 1); @@ -1167,7 +1176,7 @@ cs = 0; } goto st29; tr25: -#line 591 "parser.rl" +#line 602 "parser.rl" { if (json->allow_nan) { *result = CInfinity; @@ -1177,7 +1186,7 @@ cs = 0; } goto st29; tr27: -#line 584 "parser.rl" +#line 595 "parser.rl" { if (json->allow_nan) { *result = CNaN; @@ -1187,19 +1196,19 @@ cs = 0; } goto st29; tr31: -#line 578 "parser.rl" +#line 589 "parser.rl" { *result = Qfalse; } goto st29; tr34: -#line 575 "parser.rl" +#line 586 "parser.rl" { *result = Qnil; } goto st29; tr37: -#line 581 "parser.rl" +#line 592 "parser.rl" { *result = Qtrue; } @@ -1208,9 +1217,9 @@ cs = 0; if ( ++p == pe ) goto _test_eof29; case 29: -#line 642 "parser.rl" +#line 651 "parser.rl" { p--; {p++; cs = 29; goto _out;} } -#line 1214 "parser.c" +#line 1223 "parser.c" switch( (*p) ) { case 13: goto st29; case 32: goto st29; @@ -1451,7 +1460,7 @@ case 28: _out: {} } -#line 663 "parser.rl" +#line 672 "parser.rl" if (json->freeze) { OBJ_FREEZE(*result); @@ -1466,7 +1475,7 @@ case 28: } -#line 1470 "parser.c" +#line 1479 "parser.c" enum {JSON_integer_start = 1}; enum {JSON_integer_first_final = 3}; enum {JSON_integer_error = 0}; @@ -1474,122 +1483,72 @@ enum {JSON_integer_error = 0}; enum {JSON_integer_en_main = 1}; -#line 684 "parser.rl" +#line 693 "parser.rl" -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) +#define MAX_FAST_INTEGER_SIZE 18 +static inline VALUE fast_parse_integer(char *p, char *pe) { - int cs = EVIL; - - -#line 1486 "parser.c" - { - cs = JSON_integer_start; - } - -#line 691 "parser.rl" - json->memo = p; - -#line 1494 "parser.c" - { - if ( p == pe ) - goto _test_eof; - switch ( cs ) - { -case 1: - switch( (*p) ) { - case 45: goto st2; - case 48: goto st3; - } - if ( 49 <= (*p) && (*p) <= 57 ) - goto st5; - goto st0; -st0: -cs = 0; - goto _out; -st2: - if ( ++p == pe ) - goto _test_eof2; -case 2: - if ( (*p) == 48 ) - goto st3; - if ( 49 <= (*p) && (*p) <= 57 ) - goto st5; - goto st0; -st3: - if ( ++p == pe ) - goto _test_eof3; -case 3: - if ( 48 <= (*p) && (*p) <= 57 ) - goto st0; - goto tr4; -tr4: -#line 681 "parser.rl" - { p--; {p++; cs = 4; goto _out;} } - goto st4; -st4: - if ( ++p == pe ) - goto _test_eof4; -case 4: -#line 1535 "parser.c" - goto st0; -st5: - if ( ++p == pe ) - goto _test_eof5; -case 5: - if ( 48 <= (*p) && (*p) <= 57 ) - goto st5; - goto tr4; - } - _test_eof2: cs = 2; goto _test_eof; - _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; - _test_eof5: cs = 5; goto _test_eof; + bool negative = false; + if (*p == '-') { + negative = true; + p++; + } - _test_eof: {} - _out: {} - } + long long memo = 0; + while (p < pe) { + memo *= 10; + memo += *p - '0'; + p++; + } -#line 693 "parser.rl" + if (negative) { + memo = -memo; + } + return LL2NUM(memo); +} - if (cs >= JSON_integer_first_final) { +static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result) +{ long len = p - json->memo; - fbuffer_clear(&json->fbuffer); - fbuffer_append(&json->fbuffer, json->memo, len); - fbuffer_append_char(&json->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); + if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { + *result = fast_parse_integer(json->memo, p); + } else { + fbuffer_clear(&json->fbuffer); + fbuffer_append(&json->fbuffer, json->memo, len); + fbuffer_append_char(&json->fbuffer, '\0'); + *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); + } return p + 1; - } else { - return NULL; - } } -#line 1569 "parser.c" +#line 1527 "parser.c" enum {JSON_float_start = 1}; -enum {JSON_float_first_final = 8}; +enum {JSON_float_first_final = 6}; enum {JSON_float_error = 0}; enum {JSON_float_en_main = 1}; -#line 718 "parser.rl" +#line 745 "parser.rl" -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) +static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result) { int cs = EVIL; + bool is_float = false; -#line 1585 "parser.c" +#line 1544 "parser.c" { cs = JSON_float_start; } -#line 725 "parser.rl" +#line 753 "parser.rl" json->memo = p; -#line 1593 "parser.c" +#line 1552 "parser.c" { if ( p == pe ) goto _test_eof; @@ -1598,10 +1557,10 @@ static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *resul case 1: switch( (*p) ) { case 45: goto st2; - case 48: goto st3; + case 48: goto st6; } if ( 49 <= (*p) && (*p) <= 57 ) - goto st7; + goto st10; goto st0; st0: cs = 0; @@ -1611,24 +1570,42 @@ cs = 0; goto _test_eof2; case 2: if ( (*p) == 48 ) - goto st3; + goto st6; if ( 49 <= (*p) && (*p) <= 57 ) - goto st7; + goto st10; goto st0; -st3: +st6: if ( ++p == pe ) - goto _test_eof3; -case 3: + goto _test_eof6; +case 6: switch( (*p) ) { - case 46: goto st4; - case 69: goto st5; - case 101: goto st5; + case 45: goto st0; + case 46: goto tr8; + case 69: goto tr9; + case 101: goto tr9; } + if ( 48 <= (*p) && (*p) <= 57 ) + goto st0; + goto tr7; +tr7: +#line 737 "parser.rl" + { p--; {p++; cs = 7; goto _out;} } + goto st7; +st7: + if ( ++p == pe ) + goto _test_eof7; +case 7: +#line 1599 "parser.c" goto st0; -st4: +tr8: +#line 738 "parser.rl" + { is_float = true; } + goto st3; +st3: if ( ++p == pe ) - goto _test_eof4; -case 4: + goto _test_eof3; +case 3: +#line 1609 "parser.c" if ( 48 <= (*p) && (*p) <= 57 ) goto st8; goto st0; @@ -1637,87 +1614,86 @@ case 4: goto _test_eof8; case 8: switch( (*p) ) { - case 69: goto st5; - case 101: goto st5; + case 69: goto st4; + case 101: goto st4; } if ( (*p) > 46 ) { if ( 48 <= (*p) && (*p) <= 57 ) goto st8; } else if ( (*p) >= 45 ) goto st0; - goto tr9; + goto tr7; tr9: -#line 712 "parser.rl" - { p--; {p++; cs = 9; goto _out;} } - goto st9; -st9: - if ( ++p == pe ) - goto _test_eof9; -case 9: -#line 1658 "parser.c" - goto st0; -st5: +#line 738 "parser.rl" + { is_float = true; } + goto st4; +st4: if ( ++p == pe ) - goto _test_eof5; -case 5: + goto _test_eof4; +case 4: +#line 1635 "parser.c" switch( (*p) ) { - case 43: goto st6; - case 45: goto st6; + case 43: goto st5; + case 45: goto st5; } if ( 48 <= (*p) && (*p) <= 57 ) - goto st10; + goto st9; goto st0; -st6: +st5: if ( ++p == pe ) - goto _test_eof6; -case 6: + goto _test_eof5; +case 5: if ( 48 <= (*p) && (*p) <= 57 ) - goto st10; + goto st9; goto st0; -st10: +st9: if ( ++p == pe ) - goto _test_eof10; -case 10: + goto _test_eof9; +case 9: switch( (*p) ) { case 69: goto st0; case 101: goto st0; } if ( (*p) > 46 ) { if ( 48 <= (*p) && (*p) <= 57 ) - goto st10; + goto st9; } else if ( (*p) >= 45 ) goto st0; - goto tr9; -st7: + goto tr7; +st10: if ( ++p == pe ) - goto _test_eof7; -case 7: + goto _test_eof10; +case 10: switch( (*p) ) { - case 46: goto st4; - case 69: goto st5; - case 101: goto st5; + case 45: goto st0; + case 46: goto tr8; + case 69: goto tr9; + case 101: goto tr9; } if ( 48 <= (*p) && (*p) <= 57 ) - goto st7; - goto st0; + goto st10; + goto tr7; } _test_eof2: cs = 2; goto _test_eof; + _test_eof6: cs = 6; goto _test_eof; + _test_eof7: cs = 7; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; - _test_eof4: cs = 4; goto _test_eof; _test_eof8: cs = 8; goto _test_eof; - _test_eof9: cs = 9; goto _test_eof; + _test_eof4: cs = 4; goto _test_eof; _test_eof5: cs = 5; goto _test_eof; - _test_eof6: cs = 6; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; _test_eof10: cs = 10; goto _test_eof; - _test_eof7: cs = 7; goto _test_eof; _test_eof: {} _out: {} } -#line 727 "parser.rl" +#line 755 "parser.rl" if (cs >= JSON_float_first_final) { + if (!is_float) { + return JSON_decode_integer(json, p, result); + } VALUE mod = Qnil; ID method_id = 0; if (json->decimal_class) { @@ -1768,7 +1744,7 @@ case 7: -#line 1772 "parser.c" +#line 1748 "parser.c" enum {JSON_array_start = 1}; enum {JSON_array_first_final = 22}; enum {JSON_array_error = 0}; @@ -1776,7 +1752,7 @@ enum {JSON_array_error = 0}; enum {JSON_array_en_main = 1}; -#line 804 "parser.rl" +#line 835 "parser.rl" static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting) @@ -1789,14 +1765,14 @@ static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *resul long stack_head = json->stack->head; -#line 1793 "parser.c" +#line 1769 "parser.c" { cs = JSON_array_start; } -#line 816 "parser.rl" +#line 847 "parser.rl" -#line 1800 "parser.c" +#line 1776 "parser.c" { short _widec; if ( p == pe ) @@ -1836,7 +1812,7 @@ case 2: goto st2; goto st0; tr2: -#line 784 "parser.rl" +#line 815 "parser.rl" { VALUE v = Qnil; char *np = JSON_parse_value(json, p, pe, &v, current_nesting); @@ -1851,12 +1827,12 @@ case 2: if ( ++p == pe ) goto _test_eof3; case 3: -#line 1855 "parser.c" +#line 1831 "parser.c" _widec = (*p); if ( 44 <= (*p) && (*p) <= 44 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -1903,14 +1879,14 @@ case 7: goto st3; goto st7; tr4: -#line 796 "parser.rl" +#line 827 "parser.rl" { p--; {p++; cs = 22; goto _out;} } goto st22; st22: if ( ++p == pe ) goto _test_eof22; case 22: -#line 1914 "parser.c" +#line 1890 "parser.c" goto st0; st8: if ( ++p == pe ) @@ -1978,13 +1954,13 @@ case 13: if ( 10 <= (*p) && (*p) <= 10 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 13 ) { @@ -1992,19 +1968,19 @@ case 13: if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 32 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2043,13 +2019,13 @@ case 14: if ( 47 <= (*p) && (*p) <= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2068,20 +2044,20 @@ case 15: if ( (*p) <= 41 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 42 ) { if ( 43 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2104,13 +2080,13 @@ case 16: if ( 42 <= (*p) && (*p) <= 42 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 46 ) { @@ -2118,19 +2094,19 @@ case 16: if ( 48 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) >= 47 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2154,20 +2130,20 @@ case 17: if ( (*p) <= 9 ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else if ( (*p) > 10 ) { if ( 11 <= (*p) ) { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } } else { _widec = (short)(128 + ((*p) - -128)); if ( -#line 794 "parser.rl" +#line 825 "parser.rl" json->allow_trailing_comma ) _widec += 256; } switch( _widec ) { @@ -2239,7 +2215,7 @@ case 21: _out: {} } -#line 817 "parser.rl" +#line 848 "parser.rl" if(cs >= JSON_array_first_final) { long count = json->stack->head - stack_head; @@ -2291,6 +2267,26 @@ static inline VALUE build_string(const char *start, const char *end, bool intern return result; } +static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) +{ + size_t bufferSize = stringEnd - string; + + if (is_name && json->in_array) { + VALUE cached_key; + if (RB_UNLIKELY(symbolize)) { + cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); + } else { + cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); + } + + if (RB_LIKELY(cached_key)) { + return cached_key; + } + } + + return build_string(string, stringEnd, intern, symbolize); +} + static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) { size_t bufferSize = stringEnd - string; @@ -2298,7 +2294,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE int unescape_len; char buf[4]; - if (is_name) { + if (is_name && json->in_array) { VALUE cached_key; if (RB_UNLIKELY(symbolize)) { cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); @@ -2312,7 +2308,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE } pe = memchr(p, '\\', bufferSize); - if (RB_LIKELY(pe == NULL)) { + if (RB_UNLIKELY(pe == NULL)) { return build_string(string, stringEnd, intern, symbolize); } @@ -2413,15 +2409,15 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE } -#line 2417 "parser.c" +#line 2413 "parser.c" enum {JSON_string_start = 1}; -enum {JSON_string_first_final = 8}; +enum {JSON_string_first_final = 9}; enum {JSON_string_error = 0}; enum {JSON_string_en_main = 1}; -#line 1008 "parser.rl" +#line 1071 "parser.rl" static int @@ -2442,15 +2438,15 @@ static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE match_string; -#line 2446 "parser.c" +#line 2442 "parser.c" { cs = JSON_string_start; } -#line 1028 "parser.rl" +#line 1091 "parser.rl" json->memo = p; -#line 2454 "parser.c" +#line 2450 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2475,47 +2471,56 @@ case 2: goto st0; goto st2; tr2: -#line 995 "parser.rl" +#line 1053 "parser.rl" + { + *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); + {p = (( p + 1))-1;} + p--; + {p++; cs = 9; goto _out;} + } +#line 1046 "parser.rl" { *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - if (NIL_P(*result)) { - p--; - {p++; cs = 8; goto _out;} - } else { - {p = (( p + 1))-1;} - } + {p = (( p + 1))-1;} + p--; + {p++; cs = 9; goto _out;} } -#line 1005 "parser.rl" - { p--; {p++; cs = 8; goto _out;} } - goto st8; -st8: + goto st9; +tr6: +#line 1046 "parser.rl" + { + *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); + {p = (( p + 1))-1;} + p--; + {p++; cs = 9; goto _out;} + } + goto st9; +st9: if ( ++p == pe ) - goto _test_eof8; -case 8: -#line 2496 "parser.c" + goto _test_eof9; +case 9: +#line 2503 "parser.c" goto st0; st3: if ( ++p == pe ) goto _test_eof3; case 3: if ( (*p) == 117 ) - goto st4; + goto st5; if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) goto st0; - goto st2; + goto st4; st4: if ( ++p == pe ) goto _test_eof4; case 4: - if ( (*p) < 65 ) { - if ( 48 <= (*p) && (*p) <= 57 ) - goto st5; - } else if ( (*p) > 70 ) { - if ( 97 <= (*p) && (*p) <= 102 ) - goto st5; - } else - goto st5; - goto st0; + switch( (*p) ) { + case 34: goto tr6; + case 92: goto st3; + } + if ( 0 <= (signed char)(*(p)) && (*(p)) <= 31 ) + goto st0; + goto st4; st5: if ( ++p == pe ) goto _test_eof5; @@ -2548,27 +2553,41 @@ case 6: case 7: if ( (*p) < 65 ) { if ( 48 <= (*p) && (*p) <= 57 ) - goto st2; + goto st8; } else if ( (*p) > 70 ) { if ( 97 <= (*p) && (*p) <= 102 ) - goto st2; + goto st8; } else - goto st2; + goto st8; + goto st0; +st8: + if ( ++p == pe ) + goto _test_eof8; +case 8: + if ( (*p) < 65 ) { + if ( 48 <= (*p) && (*p) <= 57 ) + goto st4; + } else if ( (*p) > 70 ) { + if ( 97 <= (*p) && (*p) <= 102 ) + goto st4; + } else + goto st4; goto st0; } _test_eof2: cs = 2; goto _test_eof; - _test_eof8: cs = 8; goto _test_eof; + _test_eof9: cs = 9; goto _test_eof; _test_eof3: cs = 3; goto _test_eof; _test_eof4: cs = 4; goto _test_eof; _test_eof5: cs = 5; goto _test_eof; _test_eof6: cs = 6; goto _test_eof; _test_eof7: cs = 7; goto _test_eof; + _test_eof8: cs = 8; goto _test_eof; _test_eof: {} _out: {} } -#line 1030 "parser.rl" +#line 1093 "parser.rl" if (json->create_additions && RTEST(match_string = json->match_string)) { VALUE klass; @@ -2721,7 +2740,7 @@ static VALUE cParser_initialize(int argc, VALUE *argv, VALUE self) } -#line 2725 "parser.c" +#line 2744 "parser.c" enum {JSON_start = 1}; enum {JSON_first_final = 10}; enum {JSON_error = 0}; @@ -2729,7 +2748,7 @@ enum {JSON_error = 0}; enum {JSON_en_main = 1}; -#line 1196 "parser.rl" +#line 1259 "parser.rl" /* @@ -2758,16 +2777,16 @@ static VALUE cParser_parse(VALUE self) json->stack = &stack; -#line 2762 "parser.c" +#line 2781 "parser.c" { cs = JSON_start; } -#line 1224 "parser.rl" +#line 1287 "parser.rl" p = json->source; pe = p + json->len; -#line 2771 "parser.c" +#line 2790 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2801,7 +2820,7 @@ case 1: cs = 0; goto _out; tr2: -#line 1188 "parser.rl" +#line 1251 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2811,7 +2830,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2815 "parser.c" +#line 2834 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -2900,7 +2919,7 @@ case 9: _out: {} } -#line 1227 "parser.rl" +#line 1290 "parser.rl" if (json->stack_handle) { rvalue_stack_eagerly_release(json->stack_handle); @@ -2936,16 +2955,16 @@ static VALUE cParser_m_parse(VALUE klass, VALUE source, VALUE opts) json->stack = &stack; -#line 2940 "parser.c" +#line 2959 "parser.c" { cs = JSON_start; } -#line 1262 "parser.rl" +#line 1325 "parser.rl" p = json->source; pe = p + json->len; -#line 2949 "parser.c" +#line 2968 "parser.c" { if ( p == pe ) goto _test_eof; @@ -2979,7 +2998,7 @@ case 1: cs = 0; goto _out; tr2: -#line 1188 "parser.rl" +#line 1251 "parser.rl" { char *np = JSON_parse_value(json, p, pe, &result, 0); if (np == NULL) { p--; {p++; cs = 10; goto _out;} } else {p = (( np))-1;} @@ -2989,7 +3008,7 @@ cs = 0; if ( ++p == pe ) goto _test_eof10; case 10: -#line 2993 "parser.c" +#line 3012 "parser.c" switch( (*p) ) { case 13: goto st10; case 32: goto st10; @@ -3078,7 +3097,7 @@ case 9: _out: {} } -#line 1265 "parser.rl" +#line 1328 "parser.rl" if (json->stack_handle) { rvalue_stack_eagerly_release(json->stack_handle); diff --git a/ext/json/ext/parser/parser.rl b/ext/json/ext/parser/parser.rl index 6d4cc7a5b..eab60b919 100644 --- a/ext/json/ext/parser/parser.rl +++ b/ext/json/ext/parser/parser.rl @@ -15,6 +15,17 @@ static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbo static int binary_encindex; static int utf8_encindex; +#ifdef HAVE_RB_CATEGORY_WARN +# define json_deprecated(message) rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, message) +#else +# define json_deprecated(message) rb_warn(message) +#endif + +static const char deprecated_create_additions_warning[] = + "JSON.load implicit support for `create_additions: true` is deprecated " + "and will be removed in 3.0, use JSON.unsafe_load or explicitly " + "pass `create_additions: true`"; + #ifndef HAVE_RB_GC_MARK_LOCATIONS // For TruffleRuby void rb_gc_mark_locations(const VALUE *start, const VALUE *end) @@ -381,6 +392,7 @@ typedef struct JSON_ParserStruct { VALUE decimal_class; VALUE match_string; FBuffer fbuffer; + int in_array; int max_nesting; bool allow_nan; bool allow_trailing_comma; @@ -409,8 +421,7 @@ static const rb_data_type_t JSON_Parser_type; static char *JSON_parse_string(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result); -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result); +static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result); static char *JSON_parse_array(JSON_Parser *json, char *p, char *pe, VALUE *result, int current_nesting); @@ -554,7 +565,7 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu VALUE klass = rb_funcall(mJSON, i_deep_const_get, 1, klassname); if (RTEST(rb_funcall(klass, i_json_creatable_p, 0))) { if (json->deprecated_create_additions) { - rb_warn("JSON.load implicit support for `create_additions: true` is deprecated and will be removed in 3.0, use JSON.unsafe_load or explicitly pass `create_additions: true`"); + json_deprecated(deprecated_create_additions_warning); } *result = rb_funcall(klass, i_json_create, 1, *result); } @@ -616,11 +627,7 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu raise_parse_error("unexpected token at '%s'", p); } } - np = JSON_parse_float(json, fpc, pe, result); - if (np != NULL) { - fexec np; - } - np = JSON_parse_integer(json, fpc, pe, result); + np = JSON_parse_number(json, fpc, pe, result); if (np != NULL) { fexec np; } @@ -629,7 +636,9 @@ static char *JSON_parse_object(JSON_Parser *json, char *p, char *pe, VALUE *resu action parse_array { char *np; + json->in_array++; np = JSON_parse_array(json, fpc, pe, result, current_nesting + 1); + json->in_array--; if (np == NULL) { fhold; fbreak; } else fexec np; } @@ -647,10 +656,10 @@ main := ignore* ( Vtrue @parse_true | VNaN @parse_nan | VInfinity @parse_infinity | - begin_number >parse_number | - begin_string >parse_string | - begin_array >parse_array | - begin_object >parse_object + begin_number @parse_number | + begin_string @parse_string | + begin_array @parse_array | + begin_object @parse_object ) ignore* %*exit; }%% @@ -683,24 +692,40 @@ static char *JSON_parse_value(JSON_Parser *json, char *p, char *pe, VALUE *resul main := '-'? ('0' | [1-9][0-9]*) (^[0-9]? @exit); }%% -static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *result) +#define MAX_FAST_INTEGER_SIZE 18 +static inline VALUE fast_parse_integer(char *p, char *pe) { - int cs = EVIL; + bool negative = false; + if (*p == '-') { + negative = true; + p++; + } - %% write init; - json->memo = p; - %% write exec; + long long memo = 0; + while (p < pe) { + memo *= 10; + memo += *p - '0'; + p++; + } - if (cs >= JSON_integer_first_final) { + if (negative) { + memo = -memo; + } + return LL2NUM(memo); +} + +static char *JSON_decode_integer(JSON_Parser *json, char *p, VALUE *result) +{ long len = p - json->memo; - fbuffer_clear(&json->fbuffer); - fbuffer_append(&json->fbuffer, json->memo, len); - fbuffer_append_char(&json->fbuffer, '\0'); - *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); + if (RB_LIKELY(len < MAX_FAST_INTEGER_SIZE)) { + *result = fast_parse_integer(json->memo, p); + } else { + fbuffer_clear(&json->fbuffer); + fbuffer_append(&json->fbuffer, json->memo, len); + fbuffer_append_char(&json->fbuffer, '\0'); + *result = rb_cstr2inum(FBUFFER_PTR(&json->fbuffer), 10); + } return p + 1; - } else { - return NULL; - } } %%{ @@ -710,22 +735,28 @@ static char *JSON_parse_integer(JSON_Parser *json, char *p, char *pe, VALUE *res write data; action exit { fhold; fbreak; } + action isFloat { is_float = true; } main := '-'? ( - (('0' | [1-9][0-9]*) '.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) - | (('0' | [1-9][0-9]*) ([Ee] [+\-]?[0-9]+)) - ) (^[0-9Ee.\-]? @exit ); + (('0' | [1-9][0-9]*) + ((('.' [0-9]+ ([Ee] [+\-]?[0-9]+)?) | + ([Ee] [+\-]?[0-9]+)) > isFloat)? + ) (^[0-9Ee.\-]? @exit )); }%% -static char *JSON_parse_float(JSON_Parser *json, char *p, char *pe, VALUE *result) +static char *JSON_parse_number(JSON_Parser *json, char *p, char *pe, VALUE *result) { int cs = EVIL; + bool is_float = false; %% write init; json->memo = p; %% write exec; if (cs >= JSON_float_first_final) { + if (!is_float) { + return JSON_decode_integer(json, p, result); + } VALUE mod = Qnil; ID method_id = 0; if (json->decimal_class) { @@ -865,6 +896,26 @@ static inline VALUE build_string(const char *start, const char *end, bool intern return result; } +static VALUE json_string_fastpath(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) +{ + size_t bufferSize = stringEnd - string; + + if (is_name && json->in_array) { + VALUE cached_key; + if (RB_UNLIKELY(symbolize)) { + cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); + } else { + cached_key = rstring_cache_fetch(&json->name_cache, string, bufferSize); + } + + if (RB_LIKELY(cached_key)) { + return cached_key; + } + } + + return build_string(string, stringEnd, intern, symbolize); +} + static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringEnd, bool is_name, bool intern, bool symbolize) { size_t bufferSize = stringEnd - string; @@ -872,7 +923,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE int unescape_len; char buf[4]; - if (is_name) { + if (is_name && json->in_array) { VALUE cached_key; if (RB_UNLIKELY(symbolize)) { cached_key = rsymbol_cache_fetch(&json->name_cache, string, bufferSize); @@ -886,7 +937,7 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE } pe = memchr(p, '\\', bufferSize); - if (RB_LIKELY(pe == NULL)) { + if (RB_UNLIKELY(pe == NULL)) { return build_string(string, stringEnd, intern, symbolize); } @@ -992,19 +1043,31 @@ static VALUE json_string_unescape(JSON_Parser *json, char *string, char *stringE write data; - action parse_string { + action parse_complex_string { *result = json_string_unescape(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); - if (NIL_P(*result)) { - fhold; - fbreak; - } else { - fexec p + 1; - } + fexec p + 1; + fhold; + fbreak; } - action exit { fhold; fbreak; } + action parse_simple_string { + *result = json_string_fastpath(json, json->memo + 1, p, json->parsing_name, json->parsing_name || json-> freeze, json->parsing_name && json->symbolize_names); + fexec p + 1; + fhold; + fbreak; + } - main := '"' ((^([\"\\] | 0..0x1f) | '\\'[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | '\\'^([\"\\/bfnrtu]|0..0x1f))* %parse_string) '"' @exit; + double_quote = '"'; + escape = '\\'; + control = 0..0x1f; + simple = any - escape - double_quote - control; + + main := double_quote ( + (simple*)( + (double_quote) @parse_simple_string | + ((^([\"\\] | control) | escape[\"\\/bfnrt] | '\\u'[0-9a-fA-F]{4} | escape^([\"\\/bfnrtu]|0..0x1f))* double_quote) @parse_complex_string + ) + ); }%% static int diff --git a/java/src/json/ext/ByteListDirectOutputStream.java b/java/src/json/ext/ByteListDirectOutputStream.java new file mode 100644 index 000000000..178cf11c2 --- /dev/null +++ b/java/src/json/ext/ByteListDirectOutputStream.java @@ -0,0 +1,16 @@ +package json.ext; + +import org.jcodings.Encoding; +import org.jruby.util.ByteList; + +import java.io.ByteArrayOutputStream; + +public class ByteListDirectOutputStream extends ByteArrayOutputStream { + ByteListDirectOutputStream(int size) { + super(size); + } + + public ByteList toByteListDirect(Encoding encoding) { + return new ByteList(buf, 0, count, encoding, false); + } +} diff --git a/java/src/json/ext/ByteListTranscoder.java b/java/src/json/ext/ByteListTranscoder.java index 6f6ab66c1..0fedcabdd 100644 --- a/java/src/json/ext/ByteListTranscoder.java +++ b/java/src/json/ext/ByteListTranscoder.java @@ -9,6 +9,9 @@ import org.jruby.runtime.ThreadContext; import org.jruby.util.ByteList; +import java.io.IOException; +import java.io.OutputStream; + /** * A class specialized in transcoding a certain String format into another, * using UTF-8 ByteLists as both input and output. @@ -23,7 +26,7 @@ abstract class ByteListTranscoder { /** Position of the next character to read */ protected int pos; - private ByteList out; + private OutputStream out; /** * When a character that can be copied straight into the output is found, * its index is stored on this variable, and copying is delayed until @@ -37,11 +40,11 @@ protected ByteListTranscoder(ThreadContext context) { this.context = context; } - protected void init(ByteList src, ByteList out) { + protected void init(ByteList src, OutputStream out) { this.init(src, 0, src.length(), out); } - protected void init(ByteList src, int start, int end, ByteList out) { + protected void init(ByteList src, int start, int end, OutputStream out) { this.src = src; this.pos = start; this.charStart = start; @@ -142,19 +145,19 @@ protected void quoteStart() { * recently read character, or {@link #charStart} to quote * until the character before it. */ - protected void quoteStop(int endPos) { + protected void quoteStop(int endPos) throws IOException { if (quoteStart != -1) { - out.append(src, quoteStart, endPos - quoteStart); + out.write(src.bytes(), quoteStart, endPos - quoteStart); quoteStart = -1; } } - protected void append(int b) { - out.append(b); + protected void append(int b) throws IOException { + out.write(b); } - protected void append(byte[] origin, int start, int length) { - out.append(origin, start, length); + protected void append(byte[] origin, int start, int length) throws IOException { + out.write(origin, start, length); } diff --git a/java/src/json/ext/Generator.java b/java/src/json/ext/Generator.java index f76dcb383..65c30ffa7 100644 --- a/java/src/json/ext/Generator.java +++ b/java/src/json/ext/Generator.java @@ -5,6 +5,8 @@ */ package json.ext; +import org.jcodings.Encoding; +import org.jcodings.specific.UTF8Encoding; import org.jruby.Ruby; import org.jruby.RubyArray; import org.jruby.RubyBasicObject; @@ -13,11 +15,18 @@ import org.jruby.RubyFixnum; import org.jruby.RubyFloat; import org.jruby.RubyHash; +import org.jruby.RubyIO; import org.jruby.RubyString; +import org.jruby.runtime.Helpers; import org.jruby.runtime.ThreadContext; import org.jruby.runtime.builtin.IRubyObject; import org.jruby.util.ByteList; import org.jruby.exceptions.RaiseException; +import org.jruby.util.IOOutputStream; + +import java.io.IOException; +import java.io.OutputStream; +import java.nio.charset.StandardCharsets; public final class Generator { private Generator() { @@ -48,12 +57,18 @@ private Generator() { * Encodes the given object as a JSON string, using the appropriate * handler if one is found or calling #to_json if not. */ - public static RubyString + public static IRubyObject generateJson(ThreadContext context, T object, - GeneratorState config) { + GeneratorState config, IRubyObject io) { Session session = new Session(context, config); Handler handler = getHandlerFor(context.runtime, object); - return handler.generateNew(session, object); + + if (io.isNil()) { + return handler.generateNew(session, object); + } + + handler.generateToBuffer(session, object, new IOOutputStream(io)); + return io; } /** @@ -171,17 +186,20 @@ int guessSize(Session session, T object) { } RubyString generateNew(Session session, T object) { - RubyString result; - ByteList buffer = new ByteList(guessSize(session, object)); - generate(session, object, buffer); - result = RubyString.newString(session.getRuntime(), buffer); - ThreadContext context = session.getContext(); - RuntimeInfo info = session.getInfo(); - result.force_encoding(context, info.utf8.get()); - return result; + ByteListDirectOutputStream buffer = new ByteListDirectOutputStream(guessSize(session, object)); + generateToBuffer(session, object, buffer); + return RubyString.newString(session.getRuntime(), buffer.toByteListDirect(UTF8Encoding.INSTANCE)); } - abstract void generate(Session session, T object, ByteList buffer); + void generateToBuffer(Session session, T object, OutputStream buffer) { + try { + generate(session, object, buffer); + } catch (IOException ioe) { + throw session.getRuntime().newIOErrorFromException(ioe); + } + } + + abstract void generate(Session session, T object, OutputStream buffer) throws IOException; } /** @@ -189,10 +207,10 @@ RubyString generateNew(Session session, T object) { */ private static class KeywordHandler extends Handler { - private final ByteList keyword; + private String keyword; private KeywordHandler(String keyword) { - this.keyword = new ByteList(ByteList.plain(keyword), false); + this.keyword = keyword; } @Override @@ -202,12 +220,12 @@ int guessSize(Session session, T object) { @Override RubyString generateNew(Session session, T object) { - return RubyString.newStringShared(session.getRuntime(), keyword); + return RubyString.newString(session.getRuntime(), keyword); } @Override - void generate(Session session, T object, ByteList buffer) { - buffer.append(keyword); + void generate(Session session, T object, OutputStream buffer) throws IOException { + buffer.write(keyword.getBytes(StandardCharsets.UTF_8)); } } @@ -217,39 +235,43 @@ void generate(Session session, T object, ByteList buffer) { static final Handler BIGNUM_HANDLER = new Handler() { @Override - void generate(Session session, RubyBignum object, ByteList buffer) { + void generate(Session session, RubyBignum object, OutputStream buffer) throws IOException { // JRUBY-4751: RubyBignum.to_s() returns generic object // representation (fixed in 1.5, but we maintain backwards // compatibility; call to_s(IRubyObject[]) then - buffer.append(((RubyString)object.to_s(IRubyObject.NULL_ARRAY)).getByteList()); + ByteList bytes = ((RubyString) object.to_s(IRubyObject.NULL_ARRAY)).getByteList(); + buffer.write(bytes.unsafeBytes(), bytes.begin(), bytes.length()); } }; static final Handler FIXNUM_HANDLER = new Handler() { @Override - void generate(Session session, RubyFixnum object, ByteList buffer) { - buffer.append(object.to_s().getByteList()); + void generate(Session session, RubyFixnum object, OutputStream buffer) throws IOException { + ByteList bytes = object.to_s().getByteList(); + buffer.write(bytes.unsafeBytes(), bytes.begin(), bytes.length()); } }; static final Handler FLOAT_HANDLER = new Handler() { @Override - void generate(Session session, RubyFloat object, ByteList buffer) { - double value = RubyFloat.num2dbl(object); - - if (Double.isInfinite(value) || Double.isNaN(value)) { + void generate(Session session, RubyFloat object, OutputStream buffer) throws IOException { + if (object.isInfinite() || object.isNaN()) { if (!session.getState().allowNaN()) { throw Utils.newException(session.getContext(), Utils.M_GENERATOR_ERROR, object + " not allowed in JSON"); } } - buffer.append(((RubyString)object.to_s()).getByteList()); + + double value = RubyFloat.num2dbl(object); + + buffer.write(Double.toString(value).getBytes(StandardCharsets.UTF_8)); } }; + private static final byte[] EMPTY_ARRAY_BYTES = "[]".getBytes(); static final Handler ARRAY_HANDLER = new Handler() { @Override @@ -264,14 +286,14 @@ int guessSize(Session session, RubyArray object) { } @Override - void generate(Session session, RubyArray object, ByteList buffer) { + void generate(Session session, RubyArray object, OutputStream buffer) throws IOException { ThreadContext context = session.getContext(); Ruby runtime = context.getRuntime(); GeneratorState state = session.getState(); int depth = state.increaseDepth(); if (object.isEmpty()) { - buffer.append("[]".getBytes()); + buffer.write(EMPTY_ARRAY_BYTES); state.decreaseDepth(); return; } @@ -287,8 +309,8 @@ void generate(Session session, RubyArray object, ByteList buffer) { session.infectBy(object); - buffer.append((byte)'['); - buffer.append(arrayNl); + buffer.write((byte)'['); + buffer.write(arrayNl.bytes()); boolean firstItem = true; for (int i = 0, t = object.getLength(); i < t; i++) { IRubyObject element = object.eltInternal(i); @@ -296,23 +318,24 @@ void generate(Session session, RubyArray object, ByteList buffer) { if (firstItem) { firstItem = false; } else { - buffer.append(delim); + buffer.write(delim); } - buffer.append(shift); + buffer.write(shift); Handler handler = (Handler) getHandlerFor(runtime, element); handler.generate(session, element, buffer); } state.decreaseDepth(); if (arrayNl.length() != 0) { - buffer.append(arrayNl); - buffer.append(shift, 0, state.getDepth() * indentUnit.length()); + buffer.write(arrayNl.bytes()); + buffer.write(shift, 0, state.getDepth() * indentUnit.length()); } - buffer.append((byte)']'); + buffer.write((byte)']'); } }; + private static final byte[] EMPTY_HASH_BYTES = "{}".getBytes(); static final Handler HASH_HANDLER = new Handler() { @Override @@ -328,14 +351,14 @@ int guessSize(Session session, RubyHash object) { @Override void generate(final Session session, RubyHash object, - final ByteList buffer) { + final OutputStream buffer) throws IOException { ThreadContext context = session.getContext(); final Ruby runtime = context.getRuntime(); final GeneratorState state = session.getState(); final int depth = state.increaseDepth(); if (object.isEmpty()) { - buffer.append("{}".getBytes()); + buffer.write(EMPTY_HASH_BYTES); state.decreaseDepth(); return; } @@ -345,46 +368,50 @@ void generate(final Session session, RubyHash object, final ByteList spaceBefore = state.getSpaceBefore(); final ByteList space = state.getSpace(); - buffer.append((byte)'{'); - buffer.append(objectNl); + buffer.write((byte)'{'); + buffer.write(objectNl.bytes()); final boolean[] firstPair = new boolean[]{true}; object.visitAll(new RubyHash.Visitor() { @Override public void visit(IRubyObject key, IRubyObject value) { - if (firstPair[0]) { - firstPair[0] = false; - } else { - buffer.append((byte)','); - buffer.append(objectNl); + try { + if (firstPair[0]) { + firstPair[0] = false; + } else { + buffer.write((byte) ','); + buffer.write(objectNl.bytes()); + } + if (objectNl.length() != 0) buffer.write(indent); + + IRubyObject keyStr = key.callMethod(context, "to_s"); + if (keyStr.getMetaClass() == runtime.getString()) { + STRING_HANDLER.generate(session, (RubyString) keyStr, buffer); + } else { + Utils.ensureString(keyStr); + Handler keyHandler = (Handler) getHandlerFor(runtime, keyStr); + keyHandler.generate(session, keyStr, buffer); + } + session.infectBy(key); + + buffer.write(spaceBefore.bytes()); + buffer.write((byte) ':'); + buffer.write(space.bytes()); + + Handler valueHandler = (Handler) getHandlerFor(runtime, value); + valueHandler.generate(session, value, buffer); + session.infectBy(value); + } catch (Throwable t) { + Helpers.throwException(t); } - if (objectNl.length() != 0) buffer.append(indent); - - IRubyObject keyStr = key.callMethod(context, "to_s"); - if (keyStr.getMetaClass() == runtime.getString()) { - STRING_HANDLER.generate(session, (RubyString)keyStr, buffer); - } else { - Utils.ensureString(keyStr); - Handler keyHandler = (Handler) getHandlerFor(runtime, keyStr); - keyHandler.generate(session, keyStr, buffer); - } - session.infectBy(key); - - buffer.append(spaceBefore); - buffer.append((byte)':'); - buffer.append(space); - - Handler valueHandler = (Handler) getHandlerFor(runtime, value); - valueHandler.generate(session, value, buffer); - session.infectBy(value); } }); state.decreaseDepth(); if (!firstPair[0] && objectNl.length() != 0) { - buffer.append(objectNl); + buffer.write(objectNl.bytes()); } - buffer.append(Utils.repeat(state.getIndent(), state.getDepth())); - buffer.append((byte)'}'); + buffer.write(Utils.repeat(state.getIndent(), state.getDepth())); + buffer.write((byte)'}'); } }; @@ -399,7 +426,7 @@ int guessSize(Session session, RubyString object) { } @Override - void generate(Session session, RubyString object, ByteList buffer) { + void generate(Session session, RubyString object, OutputStream buffer) throws IOException { RuntimeInfo info = session.getInfo(); RubyString src; @@ -439,7 +466,7 @@ RubyString generateNew(Session session, IRubyObject object) { } @Override - void generate(Session session, IRubyObject object, ByteList buffer) { + void generate(Session session, IRubyObject object, OutputStream buffer) throws IOException { RubyString str = object.asString(); STRING_HANDLER.generate(session, str, buffer); } @@ -468,9 +495,10 @@ RubyString generateNew(Session session, IRubyObject object) { } @Override - void generate(Session session, IRubyObject object, ByteList buffer) { + void generate(Session session, IRubyObject object, OutputStream buffer) throws IOException { RubyString result = generateNew(session, object); - buffer.append(result.getByteList()); + ByteList bytes = result.getByteList(); + buffer.write(bytes.unsafeBytes(), bytes.begin(), bytes.length()); } }; } diff --git a/java/src/json/ext/GeneratorService.java b/java/src/json/ext/GeneratorService.java index e665ad144..1500c4126 100644 --- a/java/src/json/ext/GeneratorService.java +++ b/java/src/json/ext/GeneratorService.java @@ -37,6 +37,8 @@ public boolean basicLoad(Ruby runtime) throws IOException { generatorModule.defineModuleUnder("GeneratorMethods"); GeneratorMethods.populate(info, generatorMethods); + runtime.getLoadService().require("json/ext/generator/state"); + return true; } } diff --git a/java/src/json/ext/GeneratorState.java b/java/src/json/ext/GeneratorState.java index 1600b04aa..0d8a3617d 100644 --- a/java/src/json/ext/GeneratorState.java +++ b/java/src/json/ext/GeneratorState.java @@ -139,8 +139,8 @@ public static IRubyObject from_state(ThreadContext context, } @JRubyMethod(meta=true) - public static IRubyObject generate(ThreadContext context, IRubyObject klass, IRubyObject obj, IRubyObject opts) { - return fromState(context, opts).generate(context, obj); + public static IRubyObject generate(ThreadContext context, IRubyObject klass, IRubyObject obj, IRubyObject opts, IRubyObject io) { + return fromState(context, opts)._generate(context, obj, io); } static GeneratorState fromState(ThreadContext context, IRubyObject opts) { @@ -196,7 +196,7 @@ static GeneratorState fromState(ThreadContext context, RuntimeInfo info, */ @JRubyMethod(optional=1, visibility=Visibility.PRIVATE) public IRubyObject initialize(ThreadContext context, IRubyObject[] args) { - configure(context, args.length > 0 ? args[0] : null); + _configure(context, args.length > 0 ? args[0] : null); return this; } @@ -228,17 +228,23 @@ public IRubyObject initialize_copy(ThreadContext context, IRubyObject vOrig) { * the result. If no valid JSON document can be created this method raises * a GeneratorError exception. */ - @JRubyMethod - public IRubyObject generate(ThreadContext context, IRubyObject obj) { - RubyString result = Generator.generateJson(context, obj, this); + @JRubyMethod(visibility = Visibility.PRIVATE) + public IRubyObject _generate(ThreadContext context, IRubyObject obj, IRubyObject io) { + IRubyObject result = Generator.generateJson(context, obj, this, io); RuntimeInfo info = RuntimeInfo.forRuntime(context.getRuntime()); - if (result.getEncoding() != UTF8Encoding.INSTANCE) { - if (result.isFrozen()) { - result = result.strDup(context.getRuntime()); + if (!(result instanceof RubyString)) { + return result; + } + + RubyString resultString = result.convertToString(); + if (resultString.getEncoding() != UTF8Encoding.INSTANCE) { + if (resultString.isFrozen()) { + resultString = resultString.strDup(context.getRuntime()); } - result.force_encoding(context, info.utf8.get()); + resultString.force_encoding(context, info.utf8.get()); } - return result; + + return resultString; } private static boolean matchClosingBrace(ByteList bl, int pos, int len, @@ -405,7 +411,7 @@ public boolean strict() { return strict; } - @JRubyMethod(name="strict") + @JRubyMethod(name={"strict","strict?"}) public RubyBoolean strict_get(ThreadContext context) { return context.getRuntime().newBoolean(strict); } @@ -478,8 +484,8 @@ private ByteList prepareByteList(ThreadContext context, IRubyObject value) { * @param vOpts The options hash * @return The receiver */ - @JRubyMethod(alias = "merge") - public IRubyObject configure(ThreadContext context, IRubyObject vOpts) { + @JRubyMethod(visibility=Visibility.PRIVATE) + public IRubyObject _configure(ThreadContext context, IRubyObject vOpts) { OptionsReader opts = new OptionsReader(context, vOpts); ByteList indent = opts.getString("indent"); diff --git a/java/src/json/ext/StringDecoder.java b/java/src/json/ext/StringDecoder.java index 76cf18375..f4877e935 100644 --- a/java/src/json/ext/StringDecoder.java +++ b/java/src/json/ext/StringDecoder.java @@ -9,6 +9,8 @@ import org.jruby.runtime.ThreadContext; import org.jruby.util.ByteList; +import java.io.IOException; + /** * A decoder that reads a JSON-encoded string from the given sources and * returns its decoded form on a new ByteList. Escaped Unicode characters @@ -29,17 +31,20 @@ final class StringDecoder extends ByteListTranscoder { } ByteList decode(ByteList src, int start, int end) { - ByteList out = new ByteList(end - start); - out.setEncoding(src.getEncoding()); - init(src, start, end, out); - while (hasNext()) { - handleChar(readUtf8Char()); + try { + ByteListDirectOutputStream out = new ByteListDirectOutputStream(end - start); + init(src, start, end, out); + while (hasNext()) { + handleChar(readUtf8Char()); + } + quoteStop(pos); + return out.toByteListDirect(src.getEncoding()); + } catch (IOException e) { + throw context.runtime.newIOErrorFromException(e); } - quoteStop(pos); - return out; } - private void handleChar(int c) { + private void handleChar(int c) throws IOException { if (c == '\\') { quoteStop(charStart); handleEscapeSequence(); @@ -48,7 +53,7 @@ private void handleChar(int c) { } } - private void handleEscapeSequence() { + private void handleEscapeSequence() throws IOException { ensureMin(1); switch (readUtf8Char()) { case 'b': @@ -83,7 +88,7 @@ private void handleEscapeSequence() { } } - private void handleLowSurrogate(char highSurrogate) { + private void handleLowSurrogate(char highSurrogate) throws IOException { surrogatePairStart = charStart; ensureMin(1); int lowSurrogate = readUtf8Char(); @@ -103,7 +108,7 @@ private void handleLowSurrogate(char highSurrogate) { } } - private void writeUtf8Char(int codePoint) { + private void writeUtf8Char(int codePoint) throws IOException { if (codePoint < 0x80) { append(codePoint); } else if (codePoint < 0x800) { diff --git a/java/src/json/ext/StringEncoder.java b/java/src/json/ext/StringEncoder.java index 290aa2495..b1e7096e7 100644 --- a/java/src/json/ext/StringEncoder.java +++ b/java/src/json/ext/StringEncoder.java @@ -9,6 +9,9 @@ import org.jruby.runtime.ThreadContext; import org.jruby.util.ByteList; +import java.io.IOException; +import java.io.OutputStream; + /** * An encoder that reads from the given source and outputs its representation * to another ByteList. The source string is fully checked for UTF-8 validity, @@ -43,7 +46,7 @@ final class StringEncoder extends ByteListTranscoder { this.scriptSafe = scriptSafe; } - void encode(ByteList src, ByteList out) { + void encode(ByteList src, OutputStream out) throws IOException { init(src, out); append('"'); while (hasNext()) { @@ -53,7 +56,7 @@ void encode(ByteList src, ByteList out) { append('"'); } - private void handleChar(int c) { + private void handleChar(int c) throws IOException { switch (c) { case '"': case '\\': @@ -97,13 +100,13 @@ private void handleChar(int c) { } } - private void escapeChar(char c) { + private void escapeChar(char c) throws IOException { quoteStop(charStart); aux[ESCAPE_CHAR_OFFSET + 1] = (byte)c; append(aux, ESCAPE_CHAR_OFFSET, 2); } - private void escapeUtf8Char(int codePoint) { + private void escapeUtf8Char(int codePoint) throws IOException { int numChars = Character.toChars(codePoint, utf16, 0); escapeCodeUnit(utf16[0], ESCAPE_UNI1_OFFSET + 2); if (numChars > 1) escapeCodeUnit(utf16[1], ESCAPE_UNI2_OFFSET + 2); diff --git a/json.gemspec b/json.gemspec index c6aa82d36..321a85fcf 100644 --- a/json.gemspec +++ b/json.gemspec @@ -50,6 +50,7 @@ spec = Gem::Specification.new do |s| if java_ext s.platform = 'java' + s.files += Dir["lib/json/ext/**/*.jar"] else s.extensions = Dir["ext/json/**/extconf.rb"] s.files += Dir["ext/json/**/*.{c,h,rl}"] diff --git a/lib/json/common.rb b/lib/json/common.rb index 2269896ba..a88a3fffa 100644 --- a/lib/json/common.rb +++ b/lib/json/common.rb @@ -1,4 +1,5 @@ -#frozen_string_literal: true +# frozen_string_literal: true + require 'json/version' module JSON @@ -25,7 +26,7 @@ def [](object, opts = {}) elsif object.respond_to?(:to_str) str = object.to_str if str.is_a?(String) - return JSON.parse(object.to_str, opts) + return JSON.parse(str, opts) end end @@ -230,8 +231,8 @@ def parse!(source, opts = {}) # parse(File.read(path), opts) # # See method #parse. - def load_file(filespec, opts = {}) - parse(File.read(filespec), opts) + def load_file(filespec, opts = nil) + parse(File.read(filespec, encoding: Encoding::UTF_8), opts) end # :call-seq: @@ -242,7 +243,7 @@ def load_file(filespec, opts = {}) # # See method #parse! def load_file!(filespec, opts = {}) - parse!(File.read(filespec), opts) + parse!(File.read(filespec, encoding: Encoding::UTF_8), opts) end # :call-seq: @@ -285,7 +286,7 @@ def generate(obj, opts = nil) if State === opts opts.generate(obj) else - State.generate(obj, opts) + State.generate(obj, opts, nil) end end @@ -410,6 +411,10 @@ class << self # # Returns the Ruby objects created by parsing the given +source+. # + # BEWARE: This method is meant to serialise data from trusted user input, + # like from your own database server or clients under your control, it could + # be dangerous to allow untrusted users to pass JSON sources into it. + # # - Argument +source+ must be, or be convertible to, a \String: # - If +source+ responds to instance method +to_str+, # source.to_str becomes the source. @@ -424,9 +429,6 @@ class << self # - Argument +proc+, if given, must be a \Proc that accepts one argument. # It will be called recursively with each result (depth-first order). # See details below. - # BEWARE: This method is meant to serialise data from trusted user input, - # like from your own database server or clients under your control, it could - # be dangerous to allow untrusted users to pass JSON sources into it. # - Argument +opts+, if given, contains a \Hash of options for the parsing. # See {Parsing Options}[#module-JSON-label-Parsing+Options]. # The default options can be changed via method JSON.unsafe_load_default_options=. @@ -563,6 +565,16 @@ def unsafe_load(source, proc = nil, options = nil) # # Returns the Ruby objects created by parsing the given +source+. # + # BEWARE: This method is meant to serialise data from trusted user input, + # like from your own database server or clients under your control, it could + # be dangerous to allow untrusted users to pass JSON sources into it. + # If you must use it, use JSON.unsafe_load instead to make it clear. + # + # Since JSON version 2.8.0, `load` emits a deprecation warning when a + # non native type is deserialized, without `create_additions` being explicitly + # enabled, and in JSON version 3.0, `load` will have `create_additions` disabled + # by default. + # # - Argument +source+ must be, or be convertible to, a \String: # - If +source+ responds to instance method +to_str+, # source.to_str becomes the source. @@ -577,10 +589,6 @@ def unsafe_load(source, proc = nil, options = nil) # - Argument +proc+, if given, must be a \Proc that accepts one argument. # It will be called recursively with each result (depth-first order). # See details below. - # BEWARE: This method is meant to serialise data from trusted user input, - # like from your own database server or clients under your control, it could - # be dangerous to allow untrusted users to pass JSON sources into it. - # If you must use it, use JSON.unsafe_load instead to make it clear. # - Argument +opts+, if given, contains a \Hash of options for the parsing. # See {Parsing Options}[#module-JSON-label-Parsing+Options]. # The default options can be changed via method JSON.load_default_options=. @@ -793,18 +801,15 @@ def dump(obj, anIO = nil, limit = nil, kwargs = nil) opts = opts.merge(:max_nesting => limit) if limit opts = merge_dump_options(opts, **kwargs) if kwargs - result = begin - generate(obj, opts) + begin + if State === opts + opts.generate(obj, anIO) + else + State.generate(obj, opts, anIO) + end rescue JSON::NestingError raise ArgumentError, "exceed depth limit" end - - if anIO.nil? - result - else - anIO.write result - anIO - end end # Encodes string using String.encode. diff --git a/lib/json/ext/generator/state.rb b/lib/json/ext/generator/state.rb index 6cd9496e6..1e0d5245b 100644 --- a/lib/json/ext/generator/state.rb +++ b/lib/json/ext/generator/state.rb @@ -47,6 +47,17 @@ def configure(opts) alias_method :merge, :configure + # call-seq: + # generate(obj) -> String + # generate(obj, anIO) -> anIO + # + # Generates a valid JSON document from object +obj+ and returns the + # result. If no valid JSON document can be created this method raises a + # GeneratorError exception. + def generate(obj, io = nil) + _generate(obj, io) + end + # call-seq: to_h # # Returns the configuration instance variables as a hash, that can be diff --git a/lib/json/version.rb b/lib/json/version.rb index ee1312c7d..d5cfb5491 100644 --- a/lib/json/version.rb +++ b/lib/json/version.rb @@ -1,5 +1,5 @@ # frozen_string_literal: true module JSON - VERSION = '2.8.0.alpha1' + VERSION = '2.8.2' end diff --git a/test/json/json_addition_test.rb b/test/json/json_addition_test.rb index d78ae55c3..1eb269c2f 100644 --- a/test/json/json_addition_test.rb +++ b/test/json/json_addition_test.rb @@ -163,7 +163,7 @@ def test_core end def test_deprecated_load_create_additions - assert_warning(/use JSON\.unsafe_load/) do + assert_deprecated_warning(/use JSON\.unsafe_load/) do JSON.load(JSON.dump(Time.now)) end end diff --git a/test/json/json_common_interface_test.rb b/test/json/json_common_interface_test.rb index 6165cc041..a5d62337e 100644 --- a/test/json/json_common_interface_test.rb +++ b/test/json/json_common_interface_test.rb @@ -1,4 +1,5 @@ # frozen_string_literal: true + require_relative 'test_helper' require 'stringio' require 'tempfile' @@ -189,8 +190,29 @@ def test_load_file_with_option! test_load_file_with_option_shared(:load_file!) end + def test_load_file_with_bad_default_external_encoding + data = { "key" => "€" } + temp_file_containing(JSON.dump(data)) do |path| + loaded_data = with_external_encoding(Encoding::US_ASCII) do + JSON.load_file(path) + end + assert_equal data, loaded_data + end + end + private + def with_external_encoding(encoding) + verbose = $VERBOSE + $VERBOSE = nil + previous_encoding = Encoding.default_external + Encoding.default_external = encoding + yield + ensure + Encoding.default_external = previous_encoding + $VERBOSE = verbose + end + def test_load_shared(method_name) temp_file_containing(@json) do |filespec| assert_equal JSON.public_send(method_name, filespec), @hash