Skip to content

Commit 06f00a4

Browse files
committed
Deprecate duplicate keys in object
There are few legitimate use cases for duplicate keys, and can in some case be exploited. Rather to always silently accept them, we should emit a warning, and in the future require to explictly allow them.
1 parent eed753f commit 06f00a4

File tree

7 files changed

+225
-88
lines changed

7 files changed

+225
-88
lines changed

CHANGES.md

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2,6 +2,9 @@
22

33
### Unreleased
44

5+
* Add bnew `allow_duplicate_key` parsing options. By default a warning is now emitted when a duplicated key is encountered.
6+
In `json 3.0` an error will be raised.
7+
58
### 2025-05-23 (2.12.2)
69

710
* Fix compiler optimization level.

ext/json/ext/parser/parser.c

Lines changed: 59 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -35,7 +35,7 @@ static ID i_chr, i_aset, i_aref,
3535
i_leftshift, i_new, i_try_convert, i_uminus, i_encode;
3636

3737
static VALUE sym_max_nesting, sym_allow_nan, sym_allow_trailing_comma, sym_symbolize_names, sym_freeze,
38-
sym_decimal_class, sym_on_load;
38+
sym_decimal_class, sym_on_load, sym_allow_duplicate_key;
3939

4040
static int binary_encindex;
4141
static int utf8_encindex;
@@ -363,10 +363,17 @@ static int convert_UTF32_to_UTF8(char *buf, uint32_t ch)
363363
return len;
364364
}
365365

366+
enum duplicate_key_action {
367+
JSON_DEPRECATED = 0,
368+
JSON_IGNORE,
369+
JSON_RAISE,
370+
};
371+
366372
typedef struct JSON_ParserStruct {
367373
VALUE on_load_proc;
368374
VALUE decimal_class;
369375
ID decimal_method_id;
376+
enum duplicate_key_action on_duplicate_key;
370377
int max_nesting;
371378
bool allow_nan;
372379
bool allow_trailing_comma;
@@ -386,15 +393,8 @@ typedef struct JSON_ParserStateStruct {
386393
int current_nesting;
387394
} JSON_ParserState;
388395

389-
390-
#define PARSE_ERROR_FRAGMENT_LEN 32
391-
#ifdef RBIMPL_ATTR_NORETURN
392-
RBIMPL_ATTR_NORETURN()
393-
#endif
394-
static void raise_parse_error(const char *format, JSON_ParserState *state)
396+
static void cursor_position(JSON_ParserState *state, long *line_out, long *column_out)
395397
{
396-
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
397-
398398
const char *cursor = state->cursor;
399399
long column = 0;
400400
long line = 1;
@@ -411,6 +411,27 @@ static void raise_parse_error(const char *format, JSON_ParserState *state)
411411
line++;
412412
}
413413
}
414+
*line_out = line;
415+
*column_out = column;
416+
}
417+
418+
static void emit_parse_warning(const char *message, JSON_ParserState *state)
419+
{
420+
long line, column;
421+
cursor_position(state, &line, &column);
422+
423+
rb_warn("%s at line %ld column %ld", message, line, column);
424+
}
425+
426+
#define PARSE_ERROR_FRAGMENT_LEN 32
427+
#ifdef RBIMPL_ATTR_NORETURN
428+
RBIMPL_ATTR_NORETURN()
429+
#endif
430+
static void raise_parse_error(const char *format, JSON_ParserState *state)
431+
{
432+
unsigned char buffer[PARSE_ERROR_FRAGMENT_LEN + 3];
433+
long line, column;
434+
cursor_position(state, &line, &column);
414435

415436
const char *ptr = "EOF";
416437
if (state->cursor && state->cursor < state->end) {
@@ -807,11 +828,25 @@ static inline VALUE json_decode_array(JSON_ParserState *state, JSON_ParserConfig
807828
return array;
808829
}
809830

810-
static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, long count)
831+
static inline VALUE json_decode_object(JSON_ParserState *state, JSON_ParserConfig *config, size_t count)
811832
{
812-
VALUE object = rb_hash_new_capa(count);
833+
size_t entries_count = count / 2;
834+
VALUE object = rb_hash_new_capa(entries_count);
813835
rb_hash_bulk_insert(count, rvalue_stack_peek(state->stack, count), object);
814836

837+
if (RB_UNLIKELY(RHASH_SIZE(object) < entries_count)) {
838+
switch (config->on_duplicate_key) {
839+
case JSON_IGNORE:
840+
break;
841+
case JSON_DEPRECATED:
842+
emit_parse_warning("detected duplicate keys in JSON object. This will raise an error in json 3.0 unless enabled via `allow_duplicate_key: true`", state);
843+
break;
844+
case JSON_RAISE:
845+
raise_parse_error("duplicate key", state);
846+
break;
847+
}
848+
}
849+
815850
rvalue_stack_pop(state->stack, count);
816851

817852
if (config->freeze) {
@@ -1060,6 +1095,8 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
10601095
break;
10611096
}
10621097
case '{': {
1098+
const char *object_start_cursor = state->cursor;
1099+
10631100
state->cursor++;
10641101
json_eat_whitespace(state);
10651102
long stack_head = state->stack->head;
@@ -1094,8 +1131,15 @@ static VALUE json_parse_any(JSON_ParserState *state, JSON_ParserConfig *config)
10941131
if (*state->cursor == '}') {
10951132
state->cursor++;
10961133
state->current_nesting--;
1097-
long count = state->stack->head - stack_head;
1098-
return json_push_value(state, config, json_decode_object(state, config, count));
1134+
size_t count = state->stack->head - stack_head;
1135+
1136+
// Temporary rewind cursor in case an error is raised
1137+
const char *final_cursor = state->cursor;
1138+
state->cursor = object_start_cursor;
1139+
VALUE object = json_decode_object(state, config, count);
1140+
state->cursor = final_cursor;
1141+
1142+
return json_push_value(state, config, object);
10991143
}
11001144

11011145
if (*state->cursor == ',') {
@@ -1184,6 +1228,7 @@ static int parser_config_init_i(VALUE key, VALUE val, VALUE data)
11841228
else if (key == sym_symbolize_names) { config->symbolize_names = RTEST(val); }
11851229
else if (key == sym_freeze) { config->freeze = RTEST(val); }
11861230
else if (key == sym_on_load) { config->on_load_proc = RTEST(val) ? val : Qfalse; }
1231+
else if (key == sym_allow_duplicate_key) { config->on_duplicate_key = RTEST(val) ? JSON_IGNORE : JSON_RAISE; }
11871232
else if (key == sym_decimal_class) {
11881233
if (RTEST(val)) {
11891234
if (rb_respond_to(val, i_try_convert)) {
@@ -1400,6 +1445,7 @@ void Init_parser(void)
14001445
sym_freeze = ID2SYM(rb_intern("freeze"));
14011446
sym_on_load = ID2SYM(rb_intern("on_load"));
14021447
sym_decimal_class = ID2SYM(rb_intern("decimal_class"));
1448+
sym_allow_duplicate_key = ID2SYM(rb_intern("allow_duplicate_key"));
14031449

14041450
i_chr = rb_intern("chr");
14051451
i_aset = rb_intern("[]=");

java/src/json/ext/OptionsReader.java

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,13 @@ IRubyObject get(String key) {
5353
return opts == null ? null : opts.fastARef(runtime.newSymbol(key));
5454
}
5555

56+
boolean hasKey(String key) {
57+
if (opts == null) {
58+
return false;
59+
}
60+
return opts.hasKey(runtime.newSymbol(key));
61+
}
62+
5663
boolean getBool(String key, boolean defaultValue) {
5764
IRubyObject value = get(key);
5865
return value == null ? defaultValue : value.isTrue();

0 commit comments

Comments
 (0)