Skip to content

Commit b4f1f9b

Browse files
committed
fix JSONEntry Eq and add documentation
1 parent b494199 commit b4f1f9b

File tree

7 files changed

+92
-41
lines changed

7 files changed

+92
-41
lines changed

README.md

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,6 +122,23 @@ e.g. to take the existing 1kb JSON paramters, but also support 124-byte keys, us
122122

123123
If you are deriving a key to look up in-circuit and you do not know the maximum length of the key, all query methods have a version with a `_var` suffix (e.g. `JSON::get_string_var`), which accepts the key as a `BoundedVec`
124124

125+
# Architecture
126+
### Overview
127+
The JSON parser uses 5 steps to efficiently parse and index JSON data:
128+
129+
1. **build_transcript** - Convert raw bytes to token transcript
130+
2. **capture_missing_tokens & keyswap** - Fix missing tokens and correctly identify keys
131+
3. **compute_json_packed** - Pack bytes into Field elements for efficient substring extraction
132+
4. **create_json_entries** - Create structured JSON entries with parent-child relationships
133+
5. **compute_keyhash_and_sort_json_entries** - Sort entries by key hash for efficient lookups
134+
135+
### Table Generation
136+
The parser uses several lookup tables generated from `src/_table_generation/`:
137+
- `TOKEN_FLAGS_TABLE`: State transitions for token processing
138+
- `JSON_CAPTURE_TABLE`: Character-by-character parsing rules
139+
- `TOKEN_VALIDATION_TABLE`: JSON grammar validation
140+
141+
125142
# Acknowledgements
126143

127144
Many thanks to the authors of the OG noir json library https://github.com/RontoSOFT/noir-json-parser

src/_table_generation/make_tables.nr

Lines changed: 9 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -384,7 +384,7 @@ unconstrained fn generate_token_flags_table() -> [Field; NUM_TOKENS * 2] {
384384
let mut flags: [TokenFlags; NUM_TOKENS * 2] = [TokenFlags::default(); NUM_TOKENS * 2];
385385

386386
let mut no_token_flags: TokenFlags = TokenFlags {
387-
create_json_entry: 0,
387+
create_json_entry: false,
388388
is_end_of_object_or_array: 0,
389389
is_start_of_object_or_array: 0,
390390
new_context: OBJECT_LAYER as Field,
@@ -393,7 +393,7 @@ unconstrained fn generate_token_flags_table() -> [Field; NUM_TOKENS * 2] {
393393
preserve_num_entries: 1,
394394
};
395395
let mut key_token_flags: TokenFlags = TokenFlags {
396-
create_json_entry: 0,
396+
create_json_entry: false,
397397
is_end_of_object_or_array: 0,
398398
is_start_of_object_or_array: 0,
399399
new_context: OBJECT_LAYER as Field,
@@ -402,7 +402,7 @@ unconstrained fn generate_token_flags_table() -> [Field; NUM_TOKENS * 2] {
402402
preserve_num_entries: 1,
403403
};
404404
let begin_object_flags = TokenFlags {
405-
create_json_entry: 0,
405+
create_json_entry: false,
406406
is_end_of_object_or_array: 0,
407407
is_start_of_object_or_array: 1,
408408
new_context: OBJECT_LAYER as Field,
@@ -412,7 +412,7 @@ unconstrained fn generate_token_flags_table() -> [Field; NUM_TOKENS * 2] {
412412
};
413413

414414
let begin_array_flags = TokenFlags {
415-
create_json_entry: 0,
415+
create_json_entry: false,
416416
is_end_of_object_or_array: 0,
417417
is_start_of_object_or_array: 1,
418418
new_context: ARRAY_LAYER as Field,
@@ -422,7 +422,7 @@ unconstrained fn generate_token_flags_table() -> [Field; NUM_TOKENS * 2] {
422422
};
423423

424424
let mut end_object_flags = TokenFlags {
425-
create_json_entry: 1,
425+
create_json_entry: true,
426426
is_end_of_object_or_array: 1,
427427
is_start_of_object_or_array: 0,
428428
new_context: 0,
@@ -432,7 +432,7 @@ unconstrained fn generate_token_flags_table() -> [Field; NUM_TOKENS * 2] {
432432
};
433433

434434
let mut end_array_flags = TokenFlags {
435-
create_json_entry: 1,
435+
create_json_entry: true,
436436
is_end_of_object_or_array: 1,
437437
is_start_of_object_or_array: 0,
438438
new_context: 0,
@@ -442,7 +442,7 @@ unconstrained fn generate_token_flags_table() -> [Field; NUM_TOKENS * 2] {
442442
};
443443

444444
let mut string_flags = TokenFlags {
445-
create_json_entry: 1,
445+
create_json_entry: true,
446446
is_end_of_object_or_array: 0,
447447
is_start_of_object_or_array: 0,
448448
new_context: OBJECT_LAYER as Field,
@@ -452,7 +452,7 @@ unconstrained fn generate_token_flags_table() -> [Field; NUM_TOKENS * 2] {
452452
};
453453

454454
let mut numeric_flags = TokenFlags {
455-
create_json_entry: 1,
455+
create_json_entry: true,
456456
is_end_of_object_or_array: 0,
457457
is_start_of_object_or_array: 0,
458458
new_context: OBJECT_LAYER as Field,
@@ -462,7 +462,7 @@ unconstrained fn generate_token_flags_table() -> [Field; NUM_TOKENS * 2] {
462462
};
463463

464464
let mut literal_flags = TokenFlags {
465-
create_json_entry: 1,
465+
create_json_entry: true,
466466
is_end_of_object_or_array: 0,
467467
is_start_of_object_or_array: 0,
468468
new_context: OBJECT_LAYER as Field,
Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
# Table Generation Documentation
2+
3+
## Overview
4+
The JSON parser uses lookup tables to avoid branching logic and reduce gate count. These tables are generated from `src/_table_generation/make_tables.nr`.
5+
6+
## Generation Process
7+
Tables are generated by simulating all possible input combinations from basic hardcoded tables and recording the expected outputs.
8+
9+
## TOKEN_FLAGS_TABLE
10+
Maps (token, context) pairs to parsing flags:
11+
- `create_json_entry`: Whether to create a JSON entry for this token
12+
- `is_end_of_object_or_array`: Whether token ends an object/array
13+
- `is_start_of_object_or_array`: Whether token starts an object/array
14+
- `new_context`: What context to switch to
15+
- `is_key_token`: Whether token is a key
16+
- `is_value_token`: Whether token is a value
17+
- `preserve_num_entries`: Whether to preserve entry count
18+
19+
## JSON_CAPTURE_TABLE
20+
Maps (escape_flag, scan_mode, ascii) to scanning actions:
21+
- `scan_token`: Next scan mode
22+
- `push_transcript`: Whether to add token to transcript
23+
- `increase_length`: Whether to extend current token
24+
- `is_potential_escape_sequence`: Whether this could be escape sequence

src/json.nr

Lines changed: 24 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -77,8 +77,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
7777
}
7878
}
7979

80-
unconstrained fn __check_entry_ptr_bounds(entry_ptr: Field, max: u32) {
81-
entry_ptr.assert_max_bit_size::<32>();
80+
unconstrained fn __check_entry_ptr_bounds(entry_ptr: u32, max: u32) {
8281
// n.b. even though this assert is in an unconstrained function, an out of bounds error will be triggered when writing into self.key_data[entry_ptr]
8382
assert(entry_ptr as u32 < max - 1, "create_json_entries: MaxNumValues limit exceeded!");
8483
}
@@ -244,15 +243,18 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
244243
* 7. Is the token one that we should skip over? `,` or `:`
245244
**/
246245
fn create_json_entries(&mut self) {
247-
let mut entry_ptr = 0;
246+
let mut entry_ptr: u32 = 0;
248247
let mut depth: Field = 1;
249248
let mut num_entries_at_current_depth: Field = 0;
250249
let mut next_identity_value: Field = 1;
251250
let mut current_identity_value: Field = 0;
251+
// context: 0 for object, 1 for array
252252
let mut context = OBJECT_LAYER;
253253

254+
//lower 2 bytes is the index, upper 2 bytes is the length
254255
let mut current_key_index_and_length: Field = 0;
255256

257+
//stack won't pop elements, but will push new elements by overwriting the top element
256258
let mut parent_context_stack: [Field; 32] = [0; 32];
257259
let mut tokens: [Field; MaxNumTokens] = [0; MaxNumTokens];
258260
// maybe 71.75 gates per iteration
@@ -270,16 +272,17 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
270272
is_end_of_object_or_array,
271273
is_start_of_object_or_array,
272274
new_context,
273-
is_key_token: update_key,
275+
is_key_token,
274276
is_value_token,
275277
preserve_num_entries,} = TokenFlags::from_field(
276278
TOKEN_FLAGS_TABLE[cast_num_to_u32(token) + context * NUM_TOKENS],
277279
);
278280

279281
// 2 gates
282+
// only update current_key_index_and_length if the token is a key token
280283
let diff = (index + length * 0x10000) - current_key_index_and_length;
281284
std::as_witness(diff);
282-
current_key_index_and_length = diff * update_key + current_key_index_and_length;
285+
current_key_index_and_length = diff * is_key_token + current_key_index_and_length;
283286
std::as_witness(current_key_index_and_length);
284287

285288
// 2 gates
@@ -294,12 +297,13 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
294297
);
295298
// subtotal 22.25
296299
// 1 gate
300+
// set new_entry to object_or_array_entry if the token is end of an object or array, or set new_entry to value_entry
297301
let depth_index: Field = (depth - 1);
298302
// 3.5 gates
299303
let previous_stack_entry_packed = parent_context_stack[cast_num_to_u32(depth_index)];
300304

301305
// 9.5 gates
302-
let previous_stack_entry =
306+
let previous_stack_entry: JSONContextStackEntry =
303307
JSONContextStackEntry::from_field(previous_stack_entry_packed);
304308

305309
let object_or_array_entry: JSONEntry = JSONEntry {
@@ -337,6 +341,10 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
337341

338342
// 3 gates
339343
// subtotal 24 + 22.25 = 46.25
344+
// if preserve_num_entries (i.e. not start or end of object or array) is 1, then current_identity_value does not change.
345+
// if is_start_of_object_or_array is 1, then current_identity_value is set to next_identity_value.
346+
// if is_end_of_object_or_array is 1, then current_identity_value is set to previous_stack_entry.current_identity.
347+
340348
let old = current_identity_value;
341349
current_identity_value = (next_identity_value * is_start_of_object_or_array);
342350
std::as_witness(current_identity_value);
@@ -347,6 +355,8 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
347355
std::as_witness(current_identity_value);
348356

349357
// 2 gates
358+
// If we ses a value token (string/number/literal), we add 1 to count. If we see , or :, no change.
359+
// If preserve_num_entries is 0 (i.e. start or end of object or array) then we reset variable to 0.
350360
num_entries_at_current_depth =
351361
num_entries_at_current_depth * preserve_num_entries + is_value_token;
352362
std::as_witness(num_entries_at_current_depth);
@@ -356,11 +366,15 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
356366

357367
// 1 gate
358368
// if `is_end_of_object_or_array == 1`, `new_context = 0` so we can do something cheaper than a conditional select:
369+
// If is_end_of_object_or_array is 1, then new_context is 0, so set context = previous_stack_entry.context
370+
// If is_end_of_object_or_array is 0, then set context = new_context
359371
context = cast_num_to_u32(
360372
previous_stack_entry.context * is_end_of_object_or_array + new_context,
361373
);
362374
std::as_witness(context as Field);
363375
// 3 gates
376+
// If context is 0 (object context), then don't take the num_entries_at_current_depth term into account
377+
// because searching for a key only depends of the key name, not position, as opposed to array context where we need to look up by position/index.
364378
let common_term = current_identity_value
365379
+ context as Field * (num_entries_at_current_depth - 1) * 0x1000000000000;
366380
std::as_witness(common_term);
@@ -374,14 +388,14 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
374388
std::as_witness(new_key_data);
375389

376390
// 3.5 gates
377-
self.key_data[cast_num_to_u32(entry_ptr)] = new_key_data * create_json_entry;
391+
self.key_data[entry_ptr] = new_key_data * create_json_entry as Field;
378392

379393
// 3.5 gates
380394
parent_context_stack[cast_num_to_u32(depth)] = new_context_stack_entry;
381395

382396
// 4.5 gates
383-
self.json_entries_packed[cast_num_to_u32(entry_ptr)] =
384-
JSONEntryPacked { value: new_entry * create_json_entry };
397+
self.json_entries_packed[entry_ptr] =
398+
JSONEntryPacked { value: new_entry * create_json_entry as Field };
385399

386400
// 1 gate
387401
next_identity_value = next_identity_value + is_start_of_object_or_array;
@@ -393,8 +407,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
393407
// 1 gate
394408
// 2105 + 46.25
395409
// subtotal 66.75?
396-
entry_ptr += create_json_entry;
397-
std::as_witness(entry_ptr);
410+
entry_ptr += create_json_entry as u32;
398411
}
399412
self.validate_tokens(tokens);
400413
}

src/json_entry.nr

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -235,7 +235,7 @@ impl std::cmp::Eq for JSONEntry {
235235
let num_children = (self.num_children == other.num_children);
236236
let json_pointer = (self.json_pointer == other.json_pointer);
237237
let json_length = (self.json_length == other.json_length);
238-
array_ptr | entry | child | num_children | json_pointer | json_length
238+
array_ptr & entry & child & num_children & json_pointer & json_length
239239
}
240240
}
241241

src/keymap.nr

Lines changed: 9 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
6262
let KeyIndexData { json_index, json_length, parent_id, array_index } =
6363
KeyIndexData::from_field(self.key_data[i]);
6464
let hash = hasher.get_keyhash(self.json_packed, json_index, json_length);
65+
//ensures hash:0-199 bits, array_index:200-215 bits, parent_id: 216-239 bits
6566
hashlist[i] = hash + array_index * two_pow_200 + parent_id * two_pow_216;
6667
}
6768

@@ -72,49 +73,41 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
7273

7374
let mut sorted_entries: [JSONEntryPacked; MaxNumValues] =
7475
[JSONEntryPacked::default(); MaxNumValues];
76+
7577
for i in 0..MaxNumValues {
7678
sorted_entries[sort_result.sort_indices[i]] = self.json_entries_packed[i];
7779
}
7880

79-
let mut ids: [Field; MaxNumValues] = [0; MaxNumValues];
8081
let mut parent_indices: [Field; MaxNumValues] = [0; MaxNumValues];
81-
let mut entry_types: [Field; MaxNumValues] = [0; MaxNumValues];
8282

83+
let mut identity_to_json_map: [Field; MaxNumValues] = [0; MaxNumValues];
8384
for i in 0..MaxNumValues {
8485
// 11.75 + 3.5 = 15.25 gates per iteration
8586
let (id, parent_index, entry_type) = JSONEntry::extract_entry_type_id_and_parent_index_from_field(
8687
sorted_entries[i].value,
8788
);
88-
ids[i] = id;
8989
parent_indices[i] = parent_index;
90-
entry_types[i] = entry_type;
91-
}
92-
93-
let mut identity_to_json_map: [Field; MaxNumValues] = [0; MaxNumValues];
94-
// 6.5 gates per iteration
95-
for i in 0..MaxNumValues {
96-
let id = ids[i];
97-
let entry_type = entry_types[i];
9890
// 2 gates
91+
// update is 1 for end of object/array, 0 for other
9992
let update = TOKEN_ENDS_OBJECT_OR_ARRAY[cast_num_to_u32(entry_type)];
10093
// NOTE THIS RELIES ON MaxNumValues ACTUALLY DESCRIBING NUMMaxNumValues + 1
94+
// index = id if update = 1, else MaxNumValues -1
10195
// 1 gate
10296
let index = (id - (MaxNumValues as Field - 1)) * update + (MaxNumValues as Field - 1);
10397
// 3.5 gates
10498
identity_to_json_map[cast_num_to_u32(index)] = i as Field;
10599
}
106-
107100
// 13.5 gates per iteration
108101
let mut parent_identity_pre = parent_indices[0];
109102
for i in 1..MaxNumValues {
110103
let parent_identity_post = parent_indices[i];
111104
// if the parent identity changes,
112105
// 3.5 gate
113-
// the list is sorted according to parent_ideneity,
106+
// the list is sorted according to parent_identity,
114107
// n.b. parent_identity_post - parent_identity_pre is not neccessarily 0 or 1 (can be larger)
115108
// due to empty objects and arrays increasing identity value without creating associated child json entries
116-
let new_parent = lt_field_16_bit(parent_identity_pre, parent_identity_post) as Field;
117109
// let new_parent = (parent_identity_post as u32 > parent_identity_pre as u32) as Field;
110+
let new_parent = lt_field_16_bit(parent_identity_pre, parent_identity_post) as Field;
118111
// 3.5 gates
119112
let index_of_parent = identity_to_json_map[cast_num_to_u32(parent_identity_post)];
120113
// 1 gate + 3.5 gates
@@ -127,11 +120,12 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
127120
// 1 gate
128121
let index = (index_of_parent * new_parent);
129122
// 3.5 gates
123+
//index is just 0 if new_parent is false, so sorted_entries[0] is useless info
130124
sorted_entries[cast_num_to_u32(index)] = JSONEntryPacked { value: updated };
131125

132126
parent_identity_pre = parent_identity_post;
133127
}
134-
sorted_entries[0] = JSONEntryPacked::default(); // TODO document why we want to always make 0 a dead entry
128+
sorted_entries[0] = JSONEntryPacked::default();
135129
self.unsorted_json_entries_packed = self.json_entries_packed;
136130
self.json_entries_packed = sorted_entries;
137131
self.key_hashes = sort_result.sorted;

src/token_flags.nr

Lines changed: 8 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -1,18 +1,22 @@
11
pub(crate) struct TokenFlags {
2-
pub(crate) create_json_entry: Field,
2+
// set to true if token is literal/number/string(not key)/end of array/object
3+
pub(crate) create_json_entry: bool,
34
pub(crate) is_end_of_object_or_array: Field,
45
pub(crate) is_start_of_object_or_array: Field,
56
pub(crate) new_context: Field,
67
pub(crate) is_key_token: Field,
78
pub(crate) is_value_token: Field,
9+
// boolean flag that controls whether the current token should preserve the existing count of entries at the current depth or reset/increment it.
10+
// 1 for tokens like NO_TOKEN, KEY_TOKEN, STRING_TOKEN, NUMERIC_TOKEN, LITERAL_TOKEN
11+
// 0 for tokens like OBJECT_START_TOKEN, ARRAY_START_TOKEN, OBJECT_END_TOKEN, ARRAY_END_TOKEN
812
pub(crate) preserve_num_entries: Field,
913
}
1014

1115
impl TokenFlags {
1216

1317
unconstrained fn __from_field(f: Field) -> Self {
1418
let bytes: [u8; 7] = f.to_be_bytes();
15-
let create_json_entry = bytes[0] as Field;
19+
let create_json_entry = bytes[0] != 0;
1620
let is_end_of_object_or_array = bytes[1] as Field;
1721
let is_start_of_object_or_array = bytes[2] as Field;
1822
let new_context = bytes[3] as Field;
@@ -37,7 +41,6 @@ impl TokenFlags {
3741
let r = unsafe { TokenFlags::__from_field(f) };
3842

3943
// checks that the flags are binary
40-
assert(r.create_json_entry * r.create_json_entry == r.create_json_entry);
4144
assert(
4245
r.is_end_of_object_or_array * r.is_end_of_object_or_array
4346
== r.is_end_of_object_or_array,
@@ -64,12 +67,12 @@ impl TokenFlags {
6467
+ self.new_context * 0x1000000
6568
+ self.is_start_of_object_or_array * 0x100000000
6669
+ self.is_end_of_object_or_array * 0x10000000000
67-
+ self.create_json_entry * 0x1000000000000
70+
+ self.create_json_entry as Field * 0x1000000000000
6871
}
6972

7073
pub(crate) fn default() -> Self {
7174
TokenFlags {
72-
create_json_entry: 0,
75+
create_json_entry: false,
7376
is_end_of_object_or_array: 0,
7477
is_start_of_object_or_array: 0,
7578
new_context: 0,

0 commit comments

Comments
 (0)