Skip to content

Commit d9e9a66

Browse files
byroothsbt
authored andcommitted
JSON.generate: warn or raise on duplicated key
Because both strings and symbols keys are serialized the same, it always has been possible to generate documents with duplicated keys: ```ruby >> puts JSON.generate({ foo: 1, "foo" => 2 }) {"foo":1,"foo":2} ``` This is pretty much always a mistake and can cause various issues because it's not guaranteed how various JSON parsers will handle this. Until now I didn't think it was possible to catch such case without tanking performance, hence why I only made the parser more strict. But I finally found a way to check for duplicated keys cheaply enough.
1 parent 0e0f0df commit d9e9a66

File tree

6 files changed

+138
-7
lines changed

6 files changed

+138
-7
lines changed

ext/json/fbuffer/fbuffer.h

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -24,6 +24,14 @@ typedef unsigned char _Bool;
2424
#endif
2525
#endif
2626

27+
#ifndef NOINLINE
28+
#if defined(__has_attribute) && __has_attribute(noinline)
29+
#define NOINLINE() __attribute__((noinline))
30+
#else
31+
#define NOINLINE()
32+
#endif
33+
#endif
34+
2735
#ifndef RB_UNLIKELY
2836
#define RB_UNLIKELY(expr) expr
2937
#endif

ext/json/generator/generator.c

Lines changed: 67 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -9,6 +9,12 @@
99

1010
/* ruby api and some helpers */
1111

12+
enum duplicate_key_action {
13+
JSON_DEPRECATED = 0,
14+
JSON_IGNORE,
15+
JSON_RAISE,
16+
};
17+
1218
typedef struct JSON_Generator_StateStruct {
1319
VALUE indent;
1420
VALUE space;
@@ -21,6 +27,8 @@ typedef struct JSON_Generator_StateStruct {
2127
long depth;
2228
long buffer_initial_length;
2329

30+
enum duplicate_key_action on_duplicate_key;
31+
2432
bool allow_nan;
2533
bool ascii_only;
2634
bool script_safe;
@@ -34,7 +42,7 @@ typedef struct JSON_Generator_StateStruct {
3442
static VALUE mJSON, cState, cFragment, eGeneratorError, eNestingError, Encoding_UTF_8;
3543

3644
static ID i_to_s, i_to_json, i_new, i_pack, i_unpack, i_create_id, i_extend, i_encode;
37-
static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan,
45+
static VALUE sym_indent, sym_space, sym_space_before, sym_object_nl, sym_array_nl, sym_max_nesting, sym_allow_nan, sym_allow_duplicate_key,
3846
sym_ascii_only, sym_depth, sym_buffer_initial_length, sym_script_safe, sym_escape_slash, sym_strict, sym_as_json;
3947

4048

@@ -987,8 +995,11 @@ static inline VALUE vstate_get(struct generate_json_data *data)
987995
}
988996

989997
struct hash_foreach_arg {
998+
VALUE hash;
990999
struct generate_json_data *data;
991-
int iter;
1000+
int first_key_type;
1001+
bool first;
1002+
bool mixed_keys_encountered;
9921003
};
9931004

9941005
static VALUE
@@ -1006,6 +1017,22 @@ convert_string_subclass(VALUE key)
10061017
return key_to_s;
10071018
}
10081019

1020+
NOINLINE()
1021+
static void
1022+
json_inspect_hash_with_mixed_keys(struct hash_foreach_arg *arg)
1023+
{
1024+
if (arg->mixed_keys_encountered) {
1025+
return;
1026+
}
1027+
arg->mixed_keys_encountered = true;
1028+
1029+
JSON_Generator_State *state = arg->data->state;
1030+
if (state->on_duplicate_key != JSON_IGNORE) {
1031+
VALUE do_raise = state->on_duplicate_key == JSON_RAISE ? Qtrue : Qfalse;
1032+
rb_funcall(mJSON, rb_intern("on_mixed_keys_hash"), 2, arg->hash, do_raise);
1033+
}
1034+
}
1035+
10091036
static int
10101037
json_object_i(VALUE key, VALUE val, VALUE _arg)
10111038
{
@@ -1016,8 +1043,16 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
10161043
JSON_Generator_State *state = data->state;
10171044

10181045
long depth = state->depth;
1046+
int key_type = rb_type(key);
1047+
1048+
if (arg->first) {
1049+
arg->first = false;
1050+
arg->first_key_type = key_type;
1051+
}
1052+
else {
1053+
fbuffer_append_char(buffer, ',');
1054+
}
10191055

1020-
if (arg->iter > 0) fbuffer_append_char(buffer, ',');
10211056
if (RB_UNLIKELY(data->state->object_nl)) {
10221057
fbuffer_append_str(buffer, data->state->object_nl);
10231058
}
@@ -1029,21 +1064,30 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
10291064
bool as_json_called = false;
10301065

10311066
start:
1032-
switch (rb_type(key)) {
1067+
switch (key_type) {
10331068
case T_STRING:
1069+
if (RB_UNLIKELY(arg->first_key_type != T_STRING)) {
1070+
json_inspect_hash_with_mixed_keys(arg);
1071+
}
1072+
10341073
if (RB_LIKELY(RBASIC_CLASS(key) == rb_cString)) {
10351074
key_to_s = key;
10361075
} else {
10371076
key_to_s = convert_string_subclass(key);
10381077
}
10391078
break;
10401079
case T_SYMBOL:
1080+
if (RB_UNLIKELY(arg->first_key_type != T_SYMBOL)) {
1081+
json_inspect_hash_with_mixed_keys(arg);
1082+
}
1083+
10411084
key_to_s = rb_sym2str(key);
10421085
break;
10431086
default:
10441087
if (data->state->strict) {
10451088
if (RTEST(data->state->as_json) && !as_json_called) {
10461089
key = rb_proc_call_with_block(data->state->as_json, 1, &key, Qnil);
1090+
key_type = rb_type(key);
10471091
as_json_called = true;
10481092
goto start;
10491093
} else {
@@ -1064,7 +1108,6 @@ json_object_i(VALUE key, VALUE val, VALUE _arg)
10641108
if (RB_UNLIKELY(state->space)) fbuffer_append_str(buffer, data->state->space);
10651109
generate_json(buffer, data, val);
10661110

1067-
arg->iter++;
10681111
return ST_CONTINUE;
10691112
}
10701113

@@ -1091,8 +1134,9 @@ static void generate_json_object(FBuffer *buffer, struct generate_json_data *dat
10911134
fbuffer_append_char(buffer, '{');
10921135

10931136
struct hash_foreach_arg arg = {
1137+
.hash = obj,
10941138
.data = data,
1095-
.iter = 0,
1139+
.first = true,
10961140
};
10971141
rb_hash_foreach(obj, json_object_i, (VALUE)&arg);
10981142

@@ -1794,6 +1838,19 @@ static VALUE cState_ascii_only_set(VALUE self, VALUE enable)
17941838
return Qnil;
17951839
}
17961840

1841+
static VALUE cState_allow_duplicate_key_p(VALUE self)
1842+
{
1843+
GET_STATE(self);
1844+
switch (state->on_duplicate_key) {
1845+
case JSON_IGNORE:
1846+
return Qtrue;
1847+
case JSON_DEPRECATED:
1848+
return Qnil;
1849+
case JSON_RAISE:
1850+
return Qfalse;
1851+
}
1852+
}
1853+
17971854
/*
17981855
* call-seq: depth
17991856
*
@@ -1883,6 +1940,7 @@ static int configure_state_i(VALUE key, VALUE val, VALUE _arg)
18831940
else if (key == sym_script_safe) { state->script_safe = RTEST(val); }
18841941
else if (key == sym_escape_slash) { state->script_safe = RTEST(val); }
18851942
else if (key == sym_strict) { state->strict = RTEST(val); }
1943+
else if (key == sym_allow_duplicate_key) { state->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
18861944
else if (key == sym_as_json) {
18871945
VALUE proc = RTEST(val) ? rb_convert_type(val, T_DATA, "Proc", "to_proc") : Qfalse;
18881946
state_write_value(data, &state->as_json, proc);
@@ -2008,6 +2066,8 @@ void Init_generator(void)
20082066
rb_define_method(cState, "generate", cState_generate, -1);
20092067
rb_define_alias(cState, "generate_new", "generate"); // :nodoc:
20102068

2069+
rb_define_private_method(cState, "allow_duplicate_key?", cState_allow_duplicate_key_p, 0);
2070+
20112071
rb_define_singleton_method(cState, "generate", cState_m_generate, 3);
20122072

20132073
VALUE mGeneratorMethods = rb_define_module_under(mGenerator, "GeneratorMethods");
@@ -2072,6 +2132,7 @@ void Init_generator(void)
20722132
sym_escape_slash = ID2SYM(rb_intern("escape_slash"));
20732133
sym_strict = ID2SYM(rb_intern("strict"));
20742134
sym_as_json = ID2SYM(rb_intern("as_json"));
2135+
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
20752136

20762137
usascii_encindex = rb_usascii_encindex();
20772138
utf8_encindex = rb_utf8_encindex();

ext/json/lib/json/common.rb

Lines changed: 19 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -186,6 +186,25 @@ def generator=(generator) # :nodoc:
186186

187187
private
188188

189+
# Called from the extension when a hash has both string and symbol keys
190+
def on_mixed_keys_hash(hash, do_raise)
191+
set = {}
192+
hash.each_key do |key|
193+
key_str = key.to_s
194+
195+
if set[key_str]
196+
message = "detected duplicate key #{key_str.inspect} in #{hash.inspect}"
197+
if do_raise
198+
raise GeneratorError, message
199+
else
200+
deprecation_warning("#{message}.\nThis will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`")
201+
end
202+
else
203+
set[key_str] = true
204+
end
205+
end
206+
end
207+
189208
def deprecated_singleton_attr_accessor(*attrs)
190209
args = RUBY_VERSION >= "3.0" ? ", category: :deprecated" : ""
191210
attrs.each do |attr|

ext/json/lib/json/ext/generator/state.rb

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,11 @@ def to_h
5656
buffer_initial_length: buffer_initial_length,
5757
}
5858

59+
allow_duplicate_key = allow_duplicate_key?
60+
unless allow_duplicate_key.nil?
61+
result[:allow_duplicate_key] = allow_duplicate_key
62+
end
63+
5964
instance_variables.each do |iv|
6065
iv = iv.to_s[1..-1]
6166
result[iv.to_sym] = self[iv]

ext/json/parser/parser.c

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1314,7 +1314,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
13141314
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
13151315
else if (key == sym_freeze) { config->freeze = RTEST(val); }
13161316
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
1317-
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
1317+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
13181318
else if (key == sym_decimal_class) {
13191319
if (RTEST(val)) {
13201320
if (rb_respond_to(val, i_try_convert)) {

test/json/json_generator_test.rb

Lines changed: 38 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,24 @@ def test_state_defaults
234234
:space => "",
235235
:space_before => "",
236236
}.sort_by { |n,| n.to_s }, state.to_h.sort_by { |n,| n.to_s })
237+
238+
state = JSON::State.new(allow_duplicate_key: true)
239+
assert_equal({
240+
:allow_duplicate_key => true,
241+
:allow_nan => false,
242+
:array_nl => "",
243+
:as_json => false,
244+
:ascii_only => false,
245+
:buffer_initial_length => 1024,
246+
:depth => 0,
247+
:script_safe => false,
248+
:strict => false,
249+
:indent => "",
250+
:max_nesting => 100,
251+
:object_nl => "",
252+
:space => "",
253+
:space_before => "",
254+
}.sort_by { |n,| n.to_s }, state.to_h.sort_by { |n,| n.to_s })
237255
end
238256

239257
def test_allow_nan
@@ -828,4 +846,24 @@ def test_numbers_of_various_sizes
828846
assert_equal "[#{number}]", JSON.generate([number])
829847
end
830848
end
849+
850+
def test_generate_duplicate_keys_allowed
851+
hash = { foo: 1, "foo" => 2 }
852+
assert_equal %({"foo":1,"foo":2}), JSON.generate(hash, allow_duplicate_key: true)
853+
end
854+
855+
def test_generate_duplicate_keys_deprecated
856+
hash = { foo: 1, "foo" => 2 }
857+
assert_deprecated_warning(/allow_duplicate_key/) do
858+
assert_equal %({"foo":1,"foo":2}), JSON.generate(hash)
859+
end
860+
end
861+
862+
def test_generate_duplicate_keys_disallowed
863+
hash = { foo: 1, "foo" => 2 }
864+
error = assert_raise JSON::GeneratorError do
865+
JSON.generate(hash, allow_duplicate_key: false)
866+
end
867+
assert_equal %(detected duplicate key "foo" in #{hash.inspect}), error.message
868+
end
831869
end

0 commit comments

Comments
 (0)