From 246e245695ba96c19e45089951a0090d8f12f499 Mon Sep 17 00:00:00 2001
From: jialinli <jialinmli98@gmail.com>
Date: Thu, 31 Jul 2025 20:46:04 -0700
Subject: [PATCH 01/13] fix JSONEntry Eq and add documentation

---
 README.md                                 | 21 ++++++++++++++
 src/_table_generation/make_tables.nr      | 18 ++++++------
 src/_table_generation/table_generation.md | 25 ++++++++++++++++
 src/json.nr                               | 35 ++++++++++++++++-------
 src/json_entry.nr                         |  2 +-
 src/keymap.nr                             | 24 ++++++----------
 src/token_flags.nr                        |  9 +++---
 7 files changed, 93 insertions(+), 41 deletions(-)
 create mode 100644 src/_table_generation/table_generation.md

diff --git a/README.md b/README.md
index d9ff043..dcacbb7 100644
--- a/README.md
+++ b/README.md
@@ -122,6 +122,27 @@ e.g. to take the existing 1kb JSON paramters, but also support 124-byte keys, us
 
 If you are deriving a key to look up in-circuit and you do not know the maximum length of the key, all query methods have a version with a `_var` suffix (e.g. `JSON::get_string_var`), which accepts the key as a `BoundedVec`
 
+#  Architecture
+### Overview
+The JSON parser uses 5 steps to efficiently parse and index JSON data:
+
+1. **build_transcript** - Convert raw bytes to a transcript of tokens using state machine defined by by JSON_CAPTURE_TABLE. Categorize each character as string, number, ...
+2. **capture_missing_tokens & keyswap** - Fix missing tokens and correctly identify keys. Complete a second scan of the tokens, check for missing tokens (e.g.commas after literals), and for strings that are keys to an object, relabel them as keys, 
+3. **compute_json_packed** - Pack bytes into Field elements for efficient substring extraction
+4. **create_json_entries** - Create structured JSON entries with parent-child relationships
+5. **compute_keyhash_and_sort_json_entries** - Sort entries by key hash for efficient lookups
+
+### Key Design Patterns
+- **Using table lookups**: Uses many lookup tables to avoid branching logic to reduce circuit size
+- **Packing data to Field elements**: Combines multiple fields that encodes different features into a single Field element for comparison
+
+### Table Generation
+The parser uses several lookup tables generated from `src/_table_generation/`:
+- `TOKEN_FLAGS_TABLE`: State transitions for token processing
+- `JSON_CAPTURE_TABLE`: Character-by-character parsing rules
+- `TOKEN_VALIDATION_TABLE`: JSON grammar validation
+
+
 # Acknowledgements
 
 Many thanks to the authors of the OG noir json library https://github.com/RontoSOFT/noir-json-parser
diff --git a/src/_table_generation/make_tables.nr b/src/_table_generation/make_tables.nr
index 64e5921..a2a9086 100644
--- a/src/_table_generation/make_tables.nr
+++ b/src/_table_generation/make_tables.nr
@@ -384,7 +384,7 @@ unconstrained fn generate_token_flags_table() -> [Field; NUM_TOKENS * 2] {
     let mut flags: [TokenFlags; NUM_TOKENS * 2] = [TokenFlags::default(); NUM_TOKENS * 2];
 
     let mut no_token_flags: TokenFlags = TokenFlags {
-        create_json_entry: 0,
+        create_json_entry: false,
         is_end_of_object_or_array: 0,
         is_start_of_object_or_array: 0,
         new_context: OBJECT_LAYER as Field,
@@ -393,7 +393,7 @@ unconstrained fn generate_token_flags_table() -> [Field; NUM_TOKENS * 2] {
         preserve_num_entries: 1,
     };
     let mut key_token_flags: TokenFlags = TokenFlags {
-        create_json_entry: 0,
+        create_json_entry: false,
         is_end_of_object_or_array: 0,
         is_start_of_object_or_array: 0,
         new_context: OBJECT_LAYER as Field,
@@ -402,7 +402,7 @@ unconstrained fn generate_token_flags_table() -> [Field; NUM_TOKENS * 2] {
         preserve_num_entries: 1,
     };
     let begin_object_flags = TokenFlags {
-        create_json_entry: 0,
+        create_json_entry: false,
         is_end_of_object_or_array: 0,
         is_start_of_object_or_array: 1,
         new_context: OBJECT_LAYER as Field,
@@ -412,7 +412,7 @@ unconstrained fn generate_token_flags_table() -> [Field; NUM_TOKENS * 2] {
     };
 
     let begin_array_flags = TokenFlags {
-        create_json_entry: 0,
+        create_json_entry: false,
         is_end_of_object_or_array: 0,
         is_start_of_object_or_array: 1,
         new_context: ARRAY_LAYER as Field,
@@ -422,7 +422,7 @@ unconstrained fn generate_token_flags_table() -> [Field; NUM_TOKENS * 2] {
     };
 
     let mut end_object_flags = TokenFlags {
-        create_json_entry: 1,
+        create_json_entry: true,
         is_end_of_object_or_array: 1,
         is_start_of_object_or_array: 0,
         new_context: 0,
@@ -432,7 +432,7 @@ unconstrained fn generate_token_flags_table() -> [Field; NUM_TOKENS * 2] {
     };
 
     let mut end_array_flags = TokenFlags {
-        create_json_entry: 1,
+        create_json_entry: true,
         is_end_of_object_or_array: 1,
         is_start_of_object_or_array: 0,
         new_context: 0,
@@ -442,7 +442,7 @@ unconstrained fn generate_token_flags_table() -> [Field; NUM_TOKENS * 2] {
     };
 
     let mut string_flags = TokenFlags {
-        create_json_entry: 1,
+        create_json_entry: true,
         is_end_of_object_or_array: 0,
         is_start_of_object_or_array: 0,
         new_context: OBJECT_LAYER as Field,
@@ -452,7 +452,7 @@ unconstrained fn generate_token_flags_table() -> [Field; NUM_TOKENS * 2] {
     };
 
     let mut numeric_flags = TokenFlags {
-        create_json_entry: 1,
+        create_json_entry: true,
         is_end_of_object_or_array: 0,
         is_start_of_object_or_array: 0,
         new_context: OBJECT_LAYER as Field,
@@ -462,7 +462,7 @@ unconstrained fn generate_token_flags_table() -> [Field; NUM_TOKENS * 2] {
     };
 
     let mut literal_flags = TokenFlags {
-        create_json_entry: 1,
+        create_json_entry: true,
         is_end_of_object_or_array: 0,
         is_start_of_object_or_array: 0,
         new_context: OBJECT_LAYER as Field,
diff --git a/src/_table_generation/table_generation.md b/src/_table_generation/table_generation.md
new file mode 100644
index 0000000..2bd87fb
--- /dev/null
+++ b/src/_table_generation/table_generation.md
@@ -0,0 +1,25 @@
+# Table Generation Documentation
+
+## Overview
+The JSON parser uses lookup tables to avoid branching logic and reduce gate count. These tables are generated from `src/_table_generation/make_tables.nr`.
+
+## Generation Process
+Tables are generated by simulating all possible input combinations from basic hardcoded tables and recording the expected outputs.
+
+## TOKEN_FLAGS_TABLE
+Maps (token, context) pairs to parsing flags:
+- `create_json_entry`: Whether to create a JSON entry for this token, set to true if token is literal/number/string(not key)/end of array/object
+- `is_end_of_object_or_array`: Whether token ends an object/array
+- `is_start_of_object_or_array`: Whether token starts an object/array
+- `new_context`: What context to switch to, object is 0, array is 1
+- `is_key_token`: Whether token is a key
+- `is_value_token`: Whether token is a value, set to True for string_token, numeric_token, and literal_token
+- `preserve_num_entries`: boolean flag that controls whether the current token should preserve the existing count of entries at the current depth or reset/increment it. 1 for tokens like NO_TOKEN, KEY_TOKEN, STRING_TOKEN, NUMERIC_TOKEN, LITERAL_TOKEN
+0 for tokens like OBJECT_START_TOKEN, ARRAY_START_TOKEN, OBJECT_END_TOKEN, ARRAY_END_TOKEN
+
+## JSON_CAPTURE_TABLE
+Maps (escape_flag, scan_mode, ascii) to scanning actions:
+- `scan_token`: Next capture mode based on current capture mode, can be grammar_capture([,{,comma,},],:)/string_capture/literal_capture/numeric_capture/error_capture. For example, if currently we are in string capture, and character is ", then scan_token will be set to grammar_capture because we are at end of string, back to grammar scan. If we are in numeric scan, and current character is not 0-9, then we are back to grammar scan as we expect the number has ended.
+- `push_transcript`: Whether to add token to transcript: in grammar mode: true for all structual elements[,{,comma,},],:. In string_capture, true for ", which signals string end. In numeric/literal_capture, true for space, \t, \n, \r, ", and comma. Note the first scan will not pick up numerics or literals because we don't know when they end, so we need to rely on capture_missing_tokens function.
+- `increase_length`: Whether to extend current token, always false for grammar_capture, true for 0-9 in numeric capture, all characters except for " in string_capture, all letters in true, false, null in literal_capture
+- `is_potential_escape_sequence`: true if current token is / in string_capture mode
diff --git a/src/json.nr b/src/json.nr
index 660598d..7c497a8 100644
--- a/src/json.nr
+++ b/src/json.nr
@@ -77,8 +77,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
     }
 }
 
-unconstrained fn __check_entry_ptr_bounds(entry_ptr: Field, max: u32) {
-    entry_ptr.assert_max_bit_size::<32>();
+unconstrained fn __check_entry_ptr_bounds(entry_ptr: u32, max: u32) {
     // n.b. even though this assert is in an unconstrained function, an out of bounds error will be triggered when writing into self.key_data[entry_ptr]
     assert(entry_ptr as u32 < max - 1, "create_json_entries: MaxNumValues limit exceeded!");
 }
@@ -244,15 +243,18 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
      *              7. Is the token one that we should skip over? `,` or `:`
      **/
     fn create_json_entries(&mut self) {
-        let mut entry_ptr = 0;
+        let mut entry_ptr: u32 = 0;
         let mut depth: Field = 1;
         let mut num_entries_at_current_depth: Field = 0;
         let mut next_identity_value: Field = 1;
         let mut current_identity_value: Field = 0;
+        // context: 0 for object, 1 for array
         let mut context = OBJECT_LAYER;
 
+        //lower 2 bytes is the index, upper 2 bytes is the length
         let mut current_key_index_and_length: Field = 0;
 
+        //stack won't pop elements, but will push new elements by overwriting the top element
         let mut parent_context_stack: [Field; 32] = [0; 32];
         let mut tokens: [Field; MaxNumTokens] = [0; MaxNumTokens];
         //  maybe 71.75 gates per iteration
@@ -270,16 +272,17 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
     is_end_of_object_or_array,
     is_start_of_object_or_array,
     new_context,
-    is_key_token: update_key,
+    is_key_token,
     is_value_token,
     preserve_num_entries,} = TokenFlags::from_field(
                 TOKEN_FLAGS_TABLE[cast_num_to_u32(token) + context * NUM_TOKENS],
             );
 
             // 2 gates
+            // only update current_key_index_and_length if the token is a key token
             let diff = (index + length * 0x10000) - current_key_index_and_length;
             std::as_witness(diff);
-            current_key_index_and_length = diff * update_key + current_key_index_and_length;
+            current_key_index_and_length = diff * is_key_token + current_key_index_and_length;
             std::as_witness(current_key_index_and_length);
 
             // 2 gates
@@ -294,12 +297,13 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             );
             // subtotal 22.25
             // 1 gate
+            // set new_entry to object_or_array_entry if the token is end of an object or array, or set new_entry to value_entry
             let depth_index: Field = (depth - 1);
             // 3.5 gates
             let previous_stack_entry_packed = parent_context_stack[cast_num_to_u32(depth_index)];
 
             // 9.5 gates
-            let previous_stack_entry =
+            let previous_stack_entry: JSONContextStackEntry =
                 JSONContextStackEntry::from_field(previous_stack_entry_packed);
 
             let object_or_array_entry: JSONEntry = JSONEntry {
@@ -337,6 +341,10 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
 
             // 3 gates
             // subtotal 24 + 22.25 = 46.25
+            // if preserve_num_entries (i.e. not start or end of object or array) is 1, then current_identity_value does not change.
+            // if is_start_of_object_or_array is 1, then current_identity_value is set to next_identity_value.
+            // if is_end_of_object_or_array is 1, then current_identity_value is set to previous_stack_entry.current_identity.
+
             let old = current_identity_value;
             current_identity_value = (next_identity_value * is_start_of_object_or_array);
             std::as_witness(current_identity_value);
@@ -347,6 +355,8 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             std::as_witness(current_identity_value);
 
             // 2 gates
+            // If we ses a value token (string/number/literal), we add 1 to count. If we see , or :, no change.
+            // If preserve_num_entries is 0 (i.e. start or end of object or array) then we reset variable to 0.
             num_entries_at_current_depth =
                 num_entries_at_current_depth * preserve_num_entries + is_value_token;
             std::as_witness(num_entries_at_current_depth);
@@ -356,11 +366,15 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
 
             // 1 gate
             // if `is_end_of_object_or_array == 1`, `new_context = 0` so we can do something cheaper than a conditional select:
+            // If is_end_of_object_or_array is 1, then new_context is 0, so set context = previous_stack_entry.context
+            // If is_end_of_object_or_array is 0, then set context = new_context
             context = cast_num_to_u32(
                 previous_stack_entry.context * is_end_of_object_or_array + new_context,
             );
             std::as_witness(context as Field);
             // 3 gates
+            // If context is 0 (object context), then don't take the num_entries_at_current_depth term into account
+            // because searching for a key only depends of the key name, not position, as opposed to array context where we need to look up by position/index.
             let common_term = current_identity_value
                 + context as Field * (num_entries_at_current_depth - 1) * 0x1000000000000;
             std::as_witness(common_term);
@@ -374,14 +388,14 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             std::as_witness(new_key_data);
 
             // 3.5 gates
-            self.key_data[cast_num_to_u32(entry_ptr)] = new_key_data * create_json_entry;
+            self.key_data[entry_ptr] = new_key_data * create_json_entry as Field;
 
             // 3.5 gates
             parent_context_stack[cast_num_to_u32(depth)] = new_context_stack_entry;
 
             // 4.5 gates
-            self.json_entries_packed[cast_num_to_u32(entry_ptr)] =
-                JSONEntryPacked { value: new_entry * create_json_entry };
+            self.json_entries_packed[entry_ptr] =
+                JSONEntryPacked { value: new_entry * create_json_entry as Field };
 
             // 1 gate
             next_identity_value = next_identity_value + is_start_of_object_or_array;
@@ -393,8 +407,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             // 1 gate
             // 2105 + 46.25
             // subtotal 66.75?
-            entry_ptr += create_json_entry;
-            std::as_witness(entry_ptr);
+            entry_ptr += create_json_entry as u32;
         }
         self.validate_tokens(tokens);
     }
diff --git a/src/json_entry.nr b/src/json_entry.nr
index e7d976d..368968d 100644
--- a/src/json_entry.nr
+++ b/src/json_entry.nr
@@ -235,7 +235,7 @@ impl std::cmp::Eq for JSONEntry {
         let num_children = (self.num_children == other.num_children);
         let json_pointer = (self.json_pointer == other.json_pointer);
         let json_length = (self.json_length == other.json_length);
-        array_ptr | entry | child | num_children | json_pointer | json_length
+        array_ptr & entry & child & num_children & json_pointer & json_length
     }
 }
 
diff --git a/src/keymap.nr b/src/keymap.nr
index 3c3ad3c..cb763c2 100644
--- a/src/keymap.nr
+++ b/src/keymap.nr
@@ -62,6 +62,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             let KeyIndexData { json_index, json_length, parent_id, array_index } =
                 KeyIndexData::from_field(self.key_data[i]);
             let hash = hasher.get_keyhash(self.json_packed, json_index, json_length);
+            //ensures hash:0-199 bits, array_index:200-215 bits, parent_id: 216-239 bits
             hashlist[i] = hash + array_index * two_pow_200 + parent_id * two_pow_216;
         }
 
@@ -72,49 +73,41 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
 
         let mut sorted_entries: [JSONEntryPacked; MaxNumValues] =
             [JSONEntryPacked::default(); MaxNumValues];
+
         for i in 0..MaxNumValues {
             sorted_entries[sort_result.sort_indices[i]] = self.json_entries_packed[i];
         }
 
-        let mut ids: [Field; MaxNumValues] = [0; MaxNumValues];
         let mut parent_indices: [Field; MaxNumValues] = [0; MaxNumValues];
-        let mut entry_types: [Field; MaxNumValues] = [0; MaxNumValues];
 
+        let mut identity_to_json_map: [Field; MaxNumValues] = [0; MaxNumValues];
         for i in 0..MaxNumValues {
             // 11.75 + 3.5 = 15.25 gates per iteration
             let (id, parent_index, entry_type) = JSONEntry::extract_entry_type_id_and_parent_index_from_field(
                 sorted_entries[i].value,
             );
-            ids[i] = id;
             parent_indices[i] = parent_index;
-            entry_types[i] = entry_type;
-        }
-
-        let mut identity_to_json_map: [Field; MaxNumValues] = [0; MaxNumValues];
-        // 6.5 gates per iteration
-        for i in 0..MaxNumValues {
-            let id = ids[i];
-            let entry_type = entry_types[i];
             // 2 gates
+            // update is 1 for end of object/array, 0 for other
             let update = TOKEN_ENDS_OBJECT_OR_ARRAY[cast_num_to_u32(entry_type)];
             // NOTE THIS RELIES ON MaxNumValues ACTUALLY DESCRIBING NUMMaxNumValues + 1
+            // index = id if update = 1, else MaxNumValues -1
             // 1 gate
             let index = (id - (MaxNumValues as Field - 1)) * update + (MaxNumValues as Field - 1);
             // 3.5 gates
             identity_to_json_map[cast_num_to_u32(index)] = i as Field;
         }
-
         // 13.5 gates per iteration
         let mut parent_identity_pre = parent_indices[0];
         for i in 1..MaxNumValues {
             let parent_identity_post = parent_indices[i];
             // if the parent identity changes,
             // 3.5 gate
-            // the list is sorted according to parent_ideneity,
+            // the list is sorted according to parent_identity,
             // n.b. parent_identity_post - parent_identity_pre is not neccessarily 0 or 1 (can be larger)
             //      due to empty objects and arrays increasing identity value without creating associated child json entries
-            let new_parent = lt_field_16_bit(parent_identity_pre, parent_identity_post) as Field;
             // let new_parent = (parent_identity_post as u32 > parent_identity_pre as u32) as Field;
+            let new_parent = lt_field_16_bit(parent_identity_pre, parent_identity_post) as Field;
             // 3.5 gates
             let index_of_parent = identity_to_json_map[cast_num_to_u32(parent_identity_post)];
             // 1 gate + 3.5 gates
@@ -127,11 +120,12 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             // 1 gate
             let index = (index_of_parent * new_parent);
             // 3.5 gates
+            //index is just 0 if new_parent is false, so sorted_entries[0] is useless info
             sorted_entries[cast_num_to_u32(index)] = JSONEntryPacked { value: updated };
 
             parent_identity_pre = parent_identity_post;
         }
-        sorted_entries[0] = JSONEntryPacked::default(); // TODO document why we want to always make 0 a dead entry
+        sorted_entries[0] = JSONEntryPacked::default();
         self.unsorted_json_entries_packed = self.json_entries_packed;
         self.json_entries_packed = sorted_entries;
         self.key_hashes = sort_result.sorted;
diff --git a/src/token_flags.nr b/src/token_flags.nr
index abbf2cb..4ec5bf9 100644
--- a/src/token_flags.nr
+++ b/src/token_flags.nr
@@ -1,5 +1,5 @@
 pub(crate) struct TokenFlags {
-    pub(crate) create_json_entry: Field,
+    pub(crate) create_json_entry: bool,
     pub(crate) is_end_of_object_or_array: Field,
     pub(crate) is_start_of_object_or_array: Field,
     pub(crate) new_context: Field,
@@ -12,7 +12,7 @@ impl TokenFlags {
 
     unconstrained fn __from_field(f: Field) -> Self {
         let bytes: [u8; 7] = f.to_be_bytes();
-        let create_json_entry = bytes[0] as Field;
+        let create_json_entry = bytes[0] != 0;
         let is_end_of_object_or_array = bytes[1] as Field;
         let is_start_of_object_or_array = bytes[2] as Field;
         let new_context = bytes[3] as Field;
@@ -37,7 +37,6 @@ impl TokenFlags {
         let r = unsafe { TokenFlags::__from_field(f) };
 
         // checks that the flags are binary
-        assert(r.create_json_entry * r.create_json_entry == r.create_json_entry);
         assert(
             r.is_end_of_object_or_array * r.is_end_of_object_or_array
                 == r.is_end_of_object_or_array,
@@ -64,12 +63,12 @@ impl TokenFlags {
             + self.new_context * 0x1000000
             + self.is_start_of_object_or_array * 0x100000000
             + self.is_end_of_object_or_array * 0x10000000000
-            + self.create_json_entry * 0x1000000000000
+            + self.create_json_entry as Field * 0x1000000000000
     }
 
     pub(crate) fn default() -> Self {
         TokenFlags {
-            create_json_entry: 0,
+            create_json_entry: false,
             is_end_of_object_or_array: 0,
             is_start_of_object_or_array: 0,
             new_context: 0,

From c29d1793f0a808c98f6785fe21fd250c2526d12c Mon Sep 17 00:00:00 2001
From: zac-williamson <blorktronics@gmail.com>
Date: Mon, 4 Aug 2025 16:28:32 +0100
Subject: [PATCH 02/13] zac's documentation

---
 src/_string_tools/slice_packed_field.nr |   2 +-
 src/json.nr                             | 328 +++++++++++++++++-------
 src/token_flags.nr                      |  25 +-
 3 files changed, 255 insertions(+), 100 deletions(-)

diff --git a/src/_string_tools/slice_packed_field.nr b/src/_string_tools/slice_packed_field.nr
index 6c206f8..31348da 100644
--- a/src/_string_tools/slice_packed_field.nr
+++ b/src/_string_tools/slice_packed_field.nr
@@ -883,7 +883,7 @@ mod test {
             for j in 0..18 {
                 let start_byte: u32 = text.len() - num_bytes - byte_positions[j];
                 let mut expected_slices: [Field; 3] =
-                // Safety: this is a test
+                    // Safety: this is a test
                     unsafe { build_slices_for_test(text, start_byte, num_bytes) };
                 let result_slices: [Field; 3] =
                     slice_fields(slices, start_byte as Field, num_bytes as Field);
diff --git a/src/json.nr b/src/json.nr
index 7c497a8..af8e2d4 100644
--- a/src/json.nr
+++ b/src/json.nr
@@ -224,34 +224,62 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
         assert(depth == 0, "validate_tokens: unclosed objects or arrays");
     }
 
-    /**
-     * @brief given a processed transcript of json tokens, compute a list of json entries that describes the values within the JSON blob
-     * @details a 'value' here is either an Object, Array, String, Numeric or Literal.
-     *          e.g. "[ 1, 2, 3 ]" contains 4 values (3 Numeric types and the Array that contains them)
-     *
-     *          To avoid branches and if statements, we construct a state transition function out of the lookup table TOKEN_FLAGS_TABLE
-     *          This table takes as an input the following:
-     *              1. The token value of an element in the transcript
-     *              2. The layer type the previous token is located in (i.e. are we in an array or an object?)
-     *          The table outputs the following data:
-     *              1. Should we create a new json entry? (i.e. is the token a STRING_TOKEN, LITERAL_TOKEN, NUMERIC_TOKEN, END_ARRAY_TOKEN, END_OBJECT_TOKEN)
-     *              2. Is the token `}` or `]`?
-     *              3. Is the token `{` or `[`?
-     *              4. Given the current layer type and the token being queried, what should the new layer type be?
-     *              5. Is the token `KEY_TOKEN`?
-     *              6. Is the token a `STRING_TOKEN`, `NUMERIC_TOKEN` OR `VALUE_TOKEN`?
-     *              7. Is the token one that we should skip over? `,` or `:`
-     **/
+    /// Parses [`Self::transcript`] to populate [`Self::json_entries_packed`] and [`Self::key_data`]
+    /// Given a processed transcript of json tokens, compute a list of json entries that describes the values within the JSON blob
+    ///
+    /// [`Self::json_entries_packed`] is a [JSONEntry] struct whose members have been packed into a single Field element.
+    ///
+    /// A 'value' here is either an Object, Array, String, Numeric or Literal.
+    ///         e.g. "[ 1, 2, 3 ]" contains 4 values (3 Numeric types and the Array that contains them)
+    ///
+    ///         To avoid branches and if statements, we construct a state transition function out of the lookup table TOKEN_FLAGS_TABLE
+    ///         This table takes as an input the following:
+    ///             1. The token value of an element in the transcript
+    ///             2. The layer type the previous token is located in (i.e. are we in an array or an object?)
+    ///         The table outputs the following data:
+    ///             1. Should we create a new json entry? (i.e. is the token a STRING_TOKEN, LITERAL_TOKEN, NUMERIC_TOKEN, END_ARRAY_TOKEN, END_OBJECT_TOKEN)
+    ///             2. Is the token `}` or `]`?
+    ///             3. Is the token `{` or `[`?
+    ///             4. Given the current layer type and the token being queried, what should the new layer type be?
+    ///             5. Is the token `KEY_TOKEN`?
+    ///             6. Is the token a `STRING_TOKEN`, `NUMERIC_TOKEN` OR `VALUE_TOKEN`?
+    ///             7. Is the token one that we should skip over? `,` or `:`
+    ///
+    /// ## explanation of `parent_context_stack`
+    /// When recording a JSONEntry, we need to understand how many children (if any) a JSONEntry has,
+    /// as well as a way of accessing children given the parent JSONEntry object
+    /// Note: OBJECT_TOKEN and ARRAY_TOKEN have children. single values (NUMERIC_TOKEN, LITERAL_TOKEN, STRING_TOKEN) do not.
+    /// We define a "context stack" via `parent_context_stack` to track this data.
+    /// The front of `parent_context_stack` contains a JSONContextStackEntry (packed into a single Field for the purposes of efficient lookups) for the current parent
+    /// If we parse a token that creates a new parent (BEGIN_OBJECT_TOKEN or BEGIN_ARRAY_TOKEN), we push a new parent onto the stack
+    /// If we reach the end of an object or array (END_OBJECT_TOKEN, END_ARRAY_TOKEN) we pop the current parent off of the stack
+    /// Note: "stack" is used loosely here. We have a fixed-size array of packed JSONContextStackEntry vals and a pointer to the head of the stack.
+    /// (the array size defines the maximum number of entities the stack can contain, currently set at 32)
+    /// Note: the size param `32` is a magic number we should replace with a defined const global variable
+    /// To push: we increment the pointer by 1 and write a new entry at the pointer value
+    /// To pop: we decrement the pointer by 1 (we don't need to delete data because new data is written every time the pointer is incremented)
     fn create_json_entries(&mut self) {
+        let mut parent_context_stack: [Field; 32] = [0; 32];
         let mut entry_ptr: u32 = 0;
+        // depth = points to the next unused slot in parent_context_stack
+        // Note: parent_context_stack[0] = the root JSON object
         let mut depth: Field = 1;
+        // how many children does the current parent have?
         let mut num_entries_at_current_depth: Field = 0;
-        let mut next_identity_value: Field = 1;
+
+        // current_identity_value = unique identifier for all JSON objects/arrays we create
         let mut current_identity_value: Field = 0;
+        // next_identity_value = smallest integer that we've not yet assigned as a unique identifier to the JSON objects/arrays we create
+        let mut next_identity_value: Field = 1;
+        // context = is the current parent an object or array?
         // context: 0 for object, 1 for array
         let mut context = OBJECT_LAYER;
 
-        //lower 2 bytes is the index, upper 2 bytes is the length
+        // current_key_index_and_length encodes 2 bits of data in a single Field element (to save some gates)
+        // note: would be more readable if we had a custom struct that wrapped a Field element with defined update methods
+        // 1. what is the key index? (index = unique identifier, starts at 0)
+        // 2. what is the size of the key in bytes?
+        // current_key_index_and_length = index + length * 0x10000 (assumes index does not exceed 2^16. I don't think we check for this, there is an assumption that the size of the circuit would be too large to compile/run if the JSON blob has over 2^16 unique keys)
         let mut current_key_index_and_length: Field = 0;
 
         //stack won't pop elements, but will push new elements by overwriting the top element
@@ -266,18 +294,35 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
                 TranscriptEntry::from_field(self.transcript[i]);
 
             tokens[i] = token;
+
+            // The TOKEN_FLAGS_TABLE table encodes the following information:
+            // Given the current token and the context (whether the parent is an object or array),
+            // we can extract the following information from TOKEN_FLAGS_TABLE via a simple lookup:
+            // 1. Should we create a new JSONEntry object?
+            //  - i.e is the token END_ARRAY_TOKEN, END_OBJECT_TOKEN, STRING_TOKEN, NUMERIC_TOKEN, LITERAL_TOKEN
+            // 2. If the token creates a new parent (BEGIN_OBJECT_TOKEN or BEGIN_ARRAY_TOKEN), what is the context value? (0 = object, 1 = array)
+            // 3. Has the parent entity changed?
+            //  - `preserve_num_entries` = 1 if the parent entity changes, not the best variable name
+            // 5. Various bools that describe the token, which are cheaper to acquire this way than via comparison operators
+            // - `is_end_of_object_or_array, `is_start_of_object_or_array`, `is_key_token`, `is_value_token`
+            // See comments in token_flags.nr for more details
             // 13 gates
             let TokenFlags {
-    create_json_entry,
-    is_end_of_object_or_array,
-    is_start_of_object_or_array,
-    new_context,
-    is_key_token,
-    is_value_token,
-    preserve_num_entries,} = TokenFlags::from_field(
+                create_json_entry,
+                is_end_of_object_or_array,
+                is_start_of_object_or_array,
+                new_context,
+                is_key_token,
+                is_value_token,
+                preserve_num_entries,
+            } = TokenFlags::from_field(
                 TOKEN_FLAGS_TABLE[cast_num_to_u32(token) + context * NUM_TOKENS],
             );
 
+            // Determine what the current key index is and the key size in bytes
+            // (key index = byte location in the original JSON of the key)
+            // Pseudocode equivalent:
+            // current_key_index_and_length = update_key ? (index + length * 0x10000) : current_key_index_and_length
             // 2 gates
             // only update current_key_index_and_length if the token is a key token
             let diff = (index + length * 0x10000) - current_key_index_and_length;
@@ -285,6 +330,13 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             current_key_index_and_length = diff * is_key_token + current_key_index_and_length;
             std::as_witness(current_key_index_and_length);
 
+            // If the current token is BEGIN_OBJECT_TOKEN or BEGIN_ARRAY_TOKEN,
+            // we need to push a new parent object into `parent_context_stack`.
+            // We apply a trick here to avoid branching: regardless of the token type,
+            // we *always* write a new stack entry into `parent_context_stack[depth]`.
+            // If the current token is not BEGIN_OBJECT_TOKEN or BEGIN_ARRAY_TOKEN,
+            // the data we write never gets read.
+            // Note: we only update the value of `depth` if token == BEGIN_OBJECT_TOKEN or BEGIN_ARRAY_TOKEN,
             // 2 gates
             let new_context_stack_entry = JSONContextStackEntry::to_field(
                 JSONContextStackEntry {
@@ -300,60 +352,113 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             // set new_entry to object_or_array_entry if the token is end of an object or array, or set new_entry to value_entry
             let depth_index: Field = (depth - 1);
             // 3.5 gates
-            let previous_stack_entry_packed = parent_context_stack[cast_num_to_u32(depth_index)];
+            parent_context_stack[cast_num_to_u32(depth)] = new_context_stack_entry;
 
+            // `previous_stack_entry` = Extract information about the parent of the current token
+            // If the current token = END_OBJECT_TOKEN or END_ARRAY_TOKEN, previous_stack_entry = data that describes the object or array we are about to create
             // 9.5 gates
-            let previous_stack_entry: JSONContextStackEntry =
+            let depth_index: Field = (depth - 1);
+            let previous_stack_entry_packed = parent_context_stack[cast_num_to_u32(depth_index)];
+            let previous_stack_entry =
                 JSONContextStackEntry::from_field(previous_stack_entry_packed);
 
-            let object_or_array_entry: JSONEntry = JSONEntry {
-                array_pointer: previous_stack_entry.num_entries,
-                entry_type: token,
-                child_pointer: 0,
-                num_children: num_entries_at_current_depth,
-                json_pointer: previous_stack_entry.json_index,
-                json_length: length,
-                parent_index: previous_stack_entry.current_identity,
-                id: current_identity_value,
-            };
-            // 0
-            let value_entry: JSONEntry = JSONEntry {
-                array_pointer: num_entries_at_current_depth,
-                entry_type: token,
-                child_pointer: 0,
-                num_children: 0,
-                json_pointer: index,
-                json_length: length,
-                parent_index: current_identity_value,
-                id: 0,
-            };
-
-            // 3 gates
-            let object_or_array_entry_packed = object_or_array_entry.to_field();
-            // 2 gates
-            let value_entry_packed = value_entry.to_field();
-
-            // 2 gates
-            let diff = object_or_array_entry_packed - value_entry_packed;
-            std::as_witness(diff);
-            let new_entry = diff * is_end_of_object_or_array + value_entry_packed;
-            std::as_witness(new_entry);
+            // update the depth of the context stack
+            // 1 gate
+            depth = depth + is_start_of_object_or_array - is_end_of_object_or_array;
 
-            // 3 gates
-            // subtotal 24 + 22.25 = 46.25
-            // if preserve_num_entries (i.e. not start or end of object or array) is 1, then current_identity_value does not change.
-            // if is_start_of_object_or_array is 1, then current_identity_value is set to next_identity_value.
-            // if is_end_of_object_or_array is 1, then current_identity_value is set to previous_stack_entry.current_identity.
-
-            let old = current_identity_value;
-            current_identity_value = (next_identity_value * is_start_of_object_or_array);
-            std::as_witness(current_identity_value);
-            current_identity_value = current_identity_value
-                + (previous_stack_entry.current_identity * is_end_of_object_or_array);
-            std::as_witness(current_identity_value);
-            current_identity_value = current_identity_value + old * preserve_num_entries;
-            std::as_witness(current_identity_value);
+            /* PSEUDOCODE FOR FOLLOWING BLOCK:
+            let mut new_entry: JSONEntry = JSONEntry::default()
+            if create_json_entry {
+                let array_entry = object_or_array_entry_packed ? object_or_array_entry_packed : value_entry_packed;
+                self.json_entries_packed[cast_num_to_u32(entry_ptr)] = array_entry;   }
+            */
+            {
+                // If token = END_OBJECT_TOKEN or END_ARRAY_TOKEN, we derive the following:
+                // array_pointer = if *this entity's parent is an array*, `array_pointer` will point to the index in the array where the current object/array is located
+                // entry_type = END_OBJECT_TOKEN or END_ARRAY_TOKEN
+                // child_pointer = points to the first child object, is set in compute_keyhash_and_sort_json_entries
+                // num_children = how many entities does this object/array contain?
+                // json_pointer = byte index of the original json where this object/array starts
+                // parent_index = what's the ID of the object/array's parent?
+                let object_or_array_entry: JSONEntry = JSONEntry {
+                    array_pointer: previous_stack_entry.num_entries,
+                    entry_type: token,
+                    child_pointer: 0,
+                    num_children: num_entries_at_current_depth,
+                    json_pointer: previous_stack_entry.json_index,
+                    json_length: length,
+                    parent_index: previous_stack_entry.current_identity,
+                    id: current_identity_value,
+                };
+                // If token = STRING_TOKEN, NUMERIC_TOKEN or LITERAL_TOKEN, we derive the following:
+                // array_pointer = if *this entity's parent is an array*, `array_pointer` will point to the index in the array where the current string/number/literal is located
+                // entry_type = STRING_TOKEN, NUMERIC_TOKEN or LITERAL_TOKEN
+                // child_pointer = not relevant for value entries (cannot contain children)
+                // num_children = not relevant for value entries (cannot contain children)
+                // json_pointer = byte index of the original json where this string/number/literal starts
+                // parent_index = what's the ID of the string/number/literal's parent?
+                let value_entry: JSONEntry = JSONEntry {
+                    array_pointer: num_entries_at_current_depth,
+                    entry_type: token,
+                    child_pointer: 0,
+                    num_children: 0,
+                    json_pointer: index,
+                    json_length: length,
+                    parent_index: current_identity_value,
+                    id: 0,
+                };
+                // 3 gates
+                let object_or_array_entry_packed = object_or_array_entry.to_field();
+                // 2 gates
+                let value_entry_packed = value_entry.to_field();
+                // 2 gates
+                let diff = object_or_array_entry_packed - value_entry_packed;
+                std::as_witness(diff);
+                let new_entry = diff * is_end_of_object_or_array + value_entry_packed;
+                std::as_witness(new_entry);
+                // 4.5 gates
+                self.json_entries_packed[entry_ptr] =
+                    JSONEntryPacked { value: new_entry * create_json_entry as Field };
+            }
+            // Update `next_identity_value` and `current_identity_value` (unique identfiers for JSON entries)
+            // If we've ended an object/array, set `current_identity_value` to the value of the object/array's parent (whose context we're now in)
+            // If we've started an object/array, set `current_identity_value` to `next_identity_value` (the next unused unique identifier)
+            // If we've started an object/array, also increment `next_identity_value` to a new unique value
+            // Pseudocode for the following:
+            //
+            //  if !preserve_num_entries {
+            //      if is_start_of_object_or_array {
+            //          current_identity_value = next_identity_value;
+            //      }
+            //      else if is_end_of_object_or_array {
+            //          current_identity_value = previous_stack_entry.current_identity;
+            //      }
+            //      next_identity_value += is_start_of_object_or_array;
+            //  }
+            //
+            // 4 gates
+            {
+                let old = current_identity_value;
+                current_identity_value = (next_identity_value * is_start_of_object_or_array);
+                std::as_witness(current_identity_value);
+                current_identity_value = current_identity_value
+                    + (previous_stack_entry.current_identity * is_end_of_object_or_array);
+                std::as_witness(current_identity_value);
+                current_identity_value = current_identity_value + old * preserve_num_entries;
+                std::as_witness(current_identity_value);
+                // If the current token creates an object or array, subsequent entries will be a child of this object
+                // i.e. we need to assign them a new identifier so increase `next_identity_value`
+                next_identity_value = next_identity_value + is_start_of_object_or_array;
+                std::as_witness(next_identity_value);
+            }
 
+            // Update the number of entries in the parent object/array
+            // Pseudocode:
+            // if (!preserve_num_entries && is_value_token) {
+            //  num_entries_at_current_depth += 1;
+            // } else if (is_end_of_object_or_array) {
+            //  num_entries_at_current_depth = previous_stack_entry.num_entries + 1;
+            // }
             // 2 gates
             // If we ses a value token (string/number/literal), we add 1 to count. If we see , or :, no change.
             // If preserve_num_entries is 0 (i.e. start or end of object or array) then we reset variable to 0.
@@ -364,6 +469,15 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
                 + (previous_stack_entry.num_entries + 1) * is_end_of_object_or_array;
             std::as_witness(num_entries_at_current_depth);
 
+            // Set the value of `context` (badly named: are we in an object or array? context == 0 => object, context == 1 => array)
+            // If current token is END_OBJECT_TOKEN or END_ARRAY_TOKEN, set context to the context value in previous_stack_entry
+            // (i.e. restore the context to whatever the parent of the object/array is)
+            // Pseudocode:
+            // if (is_end_of_object_or_array) {
+            //   context = previous_stack_entry.context
+            // } else {
+            //   context = new_context
+            // }
             // 1 gate
             // if `is_end_of_object_or_array == 1`, `new_context = 0` so we can do something cheaper than a conditional select:
             // If is_end_of_object_or_array is 1, then new_context is 0, so set context = previous_stack_entry.context
@@ -372,6 +486,25 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
                 previous_stack_entry.context * is_end_of_object_or_array + new_context,
             );
             std::as_witness(context as Field);
+
+            // Update data that describes the key for the current token.
+            // If we are creating a JSON entry, we also populate `self.key_data` with info that describes the current entry's key
+            // key_data contains 3 members that are packed into a Field:
+            // * the key index (where in the original JSON blob does the key start?)
+            // * the key length (length of the key in bytes)
+            // * current_identity_value (unique identifier for the key's JSON object. starts at 0)
+            // * in the current parent object/array, how many JSON entries deep is the key's associated JSON object?
+            // TODO: would be much more readable if we have a custom struct `KeyData` that wrapped a Field elemenet with sensible helper methods
+            // Pseudocode:
+            // if (create_json_entry) {
+            //   let mut new_key_data;
+            //   if (is_value_token) {
+            //     new_key_data = make_key(current_key_index_and_length, current_identity_value, num_entries_at_current_depth - 1);
+            //   } else if (is_end_of_object_or_array) {
+            //     new_key_data = make_key(previous_stack_entry.current_key_index_and_length, current_identity_value, num_entries_at_current_depth - 1);
+            //   }
+            //   self.key_data[entry_ptr] = new_key_data;
+            // }
             // 3 gates
             // If context is 0 (object context), then don't take the num_entries_at_current_depth term into account
             // because searching for a key only depends of the key name, not position, as opposed to array context where we need to look up by position/index.
@@ -386,28 +519,14 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
                     * is_end_of_object_or_array
                     * 0x10000;
             std::as_witness(new_key_data);
-
             // 3.5 gates
             self.key_data[entry_ptr] = new_key_data * create_json_entry as Field;
 
-            // 3.5 gates
-            parent_context_stack[cast_num_to_u32(depth)] = new_context_stack_entry;
-
-            // 4.5 gates
-            self.json_entries_packed[entry_ptr] =
-                JSONEntryPacked { value: new_entry * create_json_entry as Field };
-
+            // Update `entry_ptr` (points to the head of self.key_data and self.json_entries_packed)
             // 1 gate
-            next_identity_value = next_identity_value + is_start_of_object_or_array;
-            std::as_witness(next_identity_value);
-
-            // 1 gate
-            depth = depth + is_start_of_object_or_array - is_end_of_object_or_array;
+            entry_ptr += create_json_entry as u32;
 
-            // 1 gate
-            // 2105 + 46.25
             // subtotal 66.75?
-            entry_ptr += create_json_entry as u32;
         }
         self.validate_tokens(tokens);
     }
@@ -432,8 +551,15 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
 
             let encoded_ascii =
                 previous_was_potential_escape_sequence * 1024 + scan_mode * 256 + ascii as Field;
-            let ScanData { scan_token, push_transcript, increase_length, is_potential_escape_sequence } =
-                ScanData::from_field(JSON_CAPTURE_TABLE[cast_num_to_u32(encoded_ascii)]);
+            let ScanData {
+                scan_token,
+                push_transcript,
+                increase_length,
+                is_potential_escape_sequence,
+            } = ScanData::from_field(JSON_CAPTURE_TABLE[cast_num_to_u32(encoded_ascii)]);
+            let mut push_transcript = push_transcript;
+            let mut scan_token = scan_token;
+            let mut increase_length = increase_length;
 
             if push_transcript == 1 {
                 let new_entry = RawTranscriptEntry::to_field(
@@ -502,21 +628,27 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             let ascii = self.json[i];
 
             // 1 gate
-            let encoded_ascii =
+            let encoded_ascii: Field =
                 previous_was_potential_escape_sequence * 1024 + scan_mode * 256 + ascii as Field;
             std::as_witness(encoded_ascii);
 
             // 2 gates
             let capture_flags = JSON_CAPTURE_TABLE[cast_num_to_u32(encoded_ascii)];
             // 5 gates
-            let ScanData { scan_token, push_transcript, increase_length, is_potential_escape_sequence } =
-                ScanData::from_field(capture_flags);
+            let ScanData {
+                scan_token,
+                push_transcript,
+                increase_length,
+                is_potential_escape_sequence,
+            } = ScanData::from_field(capture_flags);
 
             // 2 gates
-            let raw = raw_transcript[cast_num_to_u32(transcript_ptr)];
+            let raw: Field = raw_transcript[cast_num_to_u32(transcript_ptr)];
 
+            // TODO: document this
+            // TODO: why are we comparing a derived quantity against `raw_transcript` instead of constructing `raw_transcript` directly (faster)
             // 1 gate
-            let diff = raw
+            let diff: Field = raw
                 - RawTranscriptEntry::to_field(
                     RawTranscriptEntry { encoded_ascii, index: i as Field - length, length },
                 );
diff --git a/src/token_flags.nr b/src/token_flags.nr
index 4ec5bf9..b2b9c0a 100644
--- a/src/token_flags.nr
+++ b/src/token_flags.nr
@@ -1,3 +1,17 @@
+/// Describes information extracted from [TOKEN_FLAGS_TABLE], given a [Token] enum and a layer type (todo more data)
+/// * `create_json_entry` : is this token linked to the creating of a [JSONEntry] object?
+///     - we generate corresponding [JSONEntry] objects for `STRING_TOKEN`, `NUMERIC_TOKEN`, `LITERAL_TOKEN`, `END_OBJECT_TOKEN`, `END_ARRAY_TOKEN`
+/// * `is_end_of_object_or_array`: `token == END_OBJECT_TOKEN || token == END_ARRAY_TOKEN`
+/// * `is_start_of_object_or_array`: `token == START_OBJECT_TOKEN || token == `START_ARRAY_TOKEN`
+/// * `new_context`: describes whether the next token being scanned belongs to an object or array
+///     - `new_context` updates whenever we parse a `START_OBJECT_TOKEN` or `START_ARRAY_TOKEN`
+///     - we are utilizing the TOKEN_FLAGS_TABLE to execute the following (equivalent) logic for cheap:
+///     - `if (token == START_OBJECT_TOKEN) { new_context = OBJECT_LAYER; } else if (token == START_ARRAY_TOKEN { new_context = ARRAY_LAYER; })
+/// * `is_key_token`: `token == KEY_TOKEN`
+/// * `is_value_token`: `token == STRING_TOKEN || token == NUMERIC_TOKEN || token == LITERAL_TOKEN`
+/// * `preserve_num_entries`: `!(is_start_of_object_or_array || is_end_of_object_or_array)`
+/// *   - as we parse our tokens, we keep track of how many children the current parent object contains.
+///     - when `preserve_num_entries == 1` implies the current parent object has not changed (could do with renaming this variable)
 pub(crate) struct TokenFlags {
     pub(crate) create_json_entry: bool,
     pub(crate) is_end_of_object_or_array: Field,
@@ -10,6 +24,12 @@ pub(crate) struct TokenFlags {
 
 impl TokenFlags {
 
+    /// Convert a Field element that contains a packed TokenFlags object into a real TokenFlags object
+    /// Note: when accessing these objects from lookup tables, it is much more efficient to represent a TokenFlags object by a single Field element,
+    /// so only 1 lookup operation is required (vs 1 per struct member)
+    /// This is an unconstrained fn which the `from_field` method uses for witness generation
+    /// (fewer constraints required to validate the result of `__from_field` vs applying `__from_field` logic directly in a constrained fn)
+    /// Note: code would be much more readable if we had an explicit PackedTokenFlags struct that wrapped a Field element
     unconstrained fn __from_field(f: Field) -> Self {
         let bytes: [u8; 7] = f.to_be_bytes();
         let create_json_entry = bytes[0] != 0;
@@ -31,6 +51,7 @@ impl TokenFlags {
         }
     }
 
+    /// Convert a Field element that contains a packed TokenFlags object into a real TokenFlags object
     pub(crate) fn from_field(f: Field) -> Self {
         // 10 gates
         // Safety: check the comments below
@@ -55,7 +76,8 @@ impl TokenFlags {
         r
     }
 
-    // 4 gates
+    /// Pack a TokenFlags object into a Field element
+    /// 4 gates
     pub(crate) fn to_field(self) -> Field {
         self.preserve_num_entries
             + self.is_value_token * 0x100
@@ -66,6 +88,7 @@ impl TokenFlags {
             + self.create_json_entry as Field * 0x1000000000000
     }
 
+    /// Default constructor
     pub(crate) fn default() -> Self {
         TokenFlags {
             create_json_entry: false,

From c51ca3a94a0901f7d889f581b802d2042526e08c Mon Sep 17 00:00:00 2001
From: jialinli <jialinmli98@gmail.com>
Date: Tue, 5 Aug 2025 00:30:10 -0700
Subject: [PATCH 03/13] remove unused function

---
 src/_table_generation/make_tables.nr      | 22 ------------
 src/_table_generation/table_generation.md | 44 +++++++++++++++++++++++
 2 files changed, 44 insertions(+), 22 deletions(-)

diff --git a/src/_table_generation/make_tables.nr b/src/_table_generation/make_tables.nr
index a2a9086..b01dcba 100644
--- a/src/_table_generation/make_tables.nr
+++ b/src/_table_generation/make_tables.nr
@@ -46,28 +46,6 @@ global CAPTURE_ERROR_FLAG: [[bool; 128]; 4] = [
     LITERAL_CAPTURE_ERROR_FLAG,
 ];
 
-unconstrained fn make_capture_table_full() -> [[Field; 128]; 4] {
-    let mut result: [[Field; 128]; 4] = [[0; 128]; 4];
-    for i in 0..4 {
-        for j in 0..128 {
-            let table = CAPTURE_TABLE[i][j];
-            let token = CAPTURE_TOKEN[i][j];
-            let push_transcript = CAPTURE_PUSH_TRANSCRIPT[i][j] as Field;
-            let increase_length = CAPTURE_INCREASE_LENGTH[i][j] as Field;
-            let error = CAPTURE_ERROR_FLAG[i][j] as Field;
-
-            let full = table
-                + token as Field * 0x100
-                + push_transcript * 0x10000
-                + increase_length * 0x1000000
-                + error * 0x100000000;
-            result[i][j] = full;
-        }
-    }
-
-    result
-}
-
 unconstrained fn make_ascii_to_token_table() -> [Field; 1024] {
     let mut result: [Field; 256 * 4] = [0; 256 * 4];
     for i in 0..4 {
diff --git a/src/_table_generation/table_generation.md b/src/_table_generation/table_generation.md
index 2bd87fb..e81742b 100644
--- a/src/_table_generation/table_generation.md
+++ b/src/_table_generation/table_generation.md
@@ -23,3 +23,47 @@ Maps (escape_flag, scan_mode, ascii) to scanning actions:
 - `push_transcript`: Whether to add token to transcript: in grammar mode: true for all structual elements[,{,comma,},],:. In string_capture, true for ", which signals string end. In numeric/literal_capture, true for space, \t, \n, \r, ", and comma. Note the first scan will not pick up numerics or literals because we don't know when they end, so we need to rely on capture_missing_tokens function.
 - `increase_length`: Whether to extend current token, always false for grammar_capture, true for 0-9 in numeric capture, all characters except for " in string_capture, all letters in true, false, null in literal_capture
 - `is_potential_escape_sequence`: true if current token is / in string_capture mode
+
+## Other tables
+While TOKEN_FLAGS_TABLE and JSON_CAPTURE_TABLE are the more important tables, they are built from foundational hardcoded tables in make_tables_subtables.nr:
+
+GRAMMAR_CAPTURE_TABLE: State transition table for grammar scan mode. Each entry specifies the next scan mode (GRAMMAR_CAPTURE, STRING_CAPTURE, NUMERIC_CAPTURE, LITERAL_CAPTURE, or ERROR_CAPTURE) based on the encountered ASCII character. For example, "f" is mapped to LITEAL_CAPTURE because it indicates we began to scan the literal false.
+STRING_CAPTURE_TABLE
+NUMERIC_CAPTURE_TABLE
+LITERAL_CAPTURE_TABLE
+
+GRAMMAR_CAPTURE_TOKEN: Maps characters in grammar mode to token types. Converts ASCII characters into the appropriate JSON token types for structural elements, values, and literals.
+ Structural characters ({, }, [, ], ,, :) → their respective structural tokens
+- Quote (") → STRING_TOKEN (start of string)
+- Digits (0-9) → NUMERIC_TOKEN (start of number)
+- Literal starters (f, t, n) → LITERAL_TOKEN (start of true/false/null)
+- Invalid characters → NO_TOKEN or error handling
+STRING_CAPTURE_TOKEN
+NUMERIC_CAPTURE_TOKEN
+LITERAL_CAPTURE_TOKEN
+
+STRING_CAPTURE_PUSH_TRANSCRIPT: Determines when to add tokens to the transcript while scanning inside a string. Only true for the closing quote ("). This signals the end of the string and triggers token creation. All other characters within the string (letters, numbers, punctuation, spaces) are false because they extend the current string token rather than creating new tokens.
+
+GRAMMAR_CAPTURE_PUSH_TRANSCRIPT: Determines when to add tokens to the transcript while scanning in grammar mode. True for the following characters:
+- Comma (,) → true (value separator)
+- Colon (:) → true (key-value separator)
+- All other characters → false (including digits, quotes, and literal starters)
+
+NUMERIC_CAPTURE_PUSH_TRANSCRIPT: Determines when to add the current numeric token to the transcript while scanning a number. True for the following characters:
+- Whitespace (space, tab, newline, carriage return) → true (end number)
+- Quote (") → true (end number, followed by string)
+- Comma (,) → true (end number, followed by next value)
+- All other characters → false (extend current number or error)
+
+LITERAL_CAPTURE_PUSH_TRANSCRIPT: Determines when to add the current literal token (true/false/null) to the transcript while scanning a literal. True for any grammar character: , [ ] { } " space tab newline (This is only used in the first scan, in the second step capture_missing_tokens, we will be able to separate the literal and value separator)
+
+GRAMMAR_CAPTURE_INCREASE_LENGTH: Determines when to extend the current token length while scanning in grammar mode. True for Digits (0-9) -> starting numeric scan, Letters for literals (f, t, n, r, u, e, a, l, s) -> starting literal scan. For structural tokens, we don't count its length (is just  1). For string tokens, we are expecting to see a " first before seeing letters.
+
+STRING_CAPTURE_INCREASE_LENGTH: Determines when to extend the current string token while scanning inside a string. True for all printable characters except for Quote (ends the string)
+NUMERIC_CAPTURE_INCREASE_LENGTH: True for 0-9
+LITERAL_CAPTURE_INCREASE_LENGTH: True for t,r,u,e,f,a,l,s,n
+
+GRAMMAR_CAPTURE_ERROR_FLAG
+STRING_CAPTURE_ERROR_FLAG
+NUMERIC_CAPTURE_ERROR_FLAG
+LITERAL_CAPTURE_ERROR_FLAG
\ No newline at end of file

From 5cb2b530bcc2d977c7d0cb9ee933a70476eedea5 Mon Sep 17 00:00:00 2001
From: jialinli <jialinmli98@gmail.com>
Date: Tue, 5 Aug 2025 00:50:42 -0700
Subject: [PATCH 04/13] lint

---
 src/_string_tools/slice_packed_field.nr |  2 +-
 src/json.nr                             | 31 +++++++++----------------
 2 files changed, 12 insertions(+), 21 deletions(-)

diff --git a/src/_string_tools/slice_packed_field.nr b/src/_string_tools/slice_packed_field.nr
index 31348da..6c206f8 100644
--- a/src/_string_tools/slice_packed_field.nr
+++ b/src/_string_tools/slice_packed_field.nr
@@ -883,7 +883,7 @@ mod test {
             for j in 0..18 {
                 let start_byte: u32 = text.len() - num_bytes - byte_positions[j];
                 let mut expected_slices: [Field; 3] =
-                    // Safety: this is a test
+                // Safety: this is a test
                     unsafe { build_slices_for_test(text, start_byte, num_bytes) };
                 let result_slices: [Field; 3] =
                     slice_fields(slices, start_byte as Field, num_bytes as Field);
diff --git a/src/json.nr b/src/json.nr
index af8e2d4..833ef1b 100644
--- a/src/json.nr
+++ b/src/json.nr
@@ -308,14 +308,13 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             // See comments in token_flags.nr for more details
             // 13 gates
             let TokenFlags {
-                create_json_entry,
-                is_end_of_object_or_array,
-                is_start_of_object_or_array,
-                new_context,
-                is_key_token,
-                is_value_token,
-                preserve_num_entries,
-            } = TokenFlags::from_field(
+    create_json_entry,
+    is_end_of_object_or_array,
+    is_start_of_object_or_array,
+    new_context,
+    is_key_token,
+    is_value_token,
+    preserve_num_entries,} = TokenFlags::from_field(
                 TOKEN_FLAGS_TABLE[cast_num_to_u32(token) + context * NUM_TOKENS],
             );
 
@@ -551,12 +550,8 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
 
             let encoded_ascii =
                 previous_was_potential_escape_sequence * 1024 + scan_mode * 256 + ascii as Field;
-            let ScanData {
-                scan_token,
-                push_transcript,
-                increase_length,
-                is_potential_escape_sequence,
-            } = ScanData::from_field(JSON_CAPTURE_TABLE[cast_num_to_u32(encoded_ascii)]);
+            let ScanData { scan_token, push_transcript, increase_length, is_potential_escape_sequence } =
+                ScanData::from_field(JSON_CAPTURE_TABLE[cast_num_to_u32(encoded_ascii)]);
             let mut push_transcript = push_transcript;
             let mut scan_token = scan_token;
             let mut increase_length = increase_length;
@@ -635,12 +630,8 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             // 2 gates
             let capture_flags = JSON_CAPTURE_TABLE[cast_num_to_u32(encoded_ascii)];
             // 5 gates
-            let ScanData {
-                scan_token,
-                push_transcript,
-                increase_length,
-                is_potential_escape_sequence,
-            } = ScanData::from_field(capture_flags);
+            let ScanData { scan_token, push_transcript, increase_length, is_potential_escape_sequence } =
+                ScanData::from_field(capture_flags);
 
             // 2 gates
             let raw: Field = raw_transcript[cast_num_to_u32(transcript_ptr)];

From ef91decc87c131090603a0925d252693be09c582 Mon Sep 17 00:00:00 2001
From: jialinli <jialinmli98@gmail.com>
Date: Tue, 5 Aug 2025 00:56:56 -0700
Subject: [PATCH 05/13] remove unused function lt_field_8_bit

---
 README.md                                 | 37 +++++++++++++++++++++++
 src/_comparison_tools/lt.nr               | 12 --------
 src/_table_generation/table_generation.md |  4 ++-
 src/json.nr                               | 26 +++++++++++++---
 4 files changed, 61 insertions(+), 18 deletions(-)

diff --git a/README.md b/README.md
index dcacbb7..088402d 100644
--- a/README.md
+++ b/README.md
@@ -142,6 +142,43 @@ The parser uses several lookup tables generated from `src/_table_generation/`:
 - `JSON_CAPTURE_TABLE`: Character-by-character parsing rules
 - `TOKEN_VALIDATION_TABLE`: JSON grammar validation
 
+### Example walkthrough
+We can take a look at raw Json text {"name": "Alice", "age": 30} and how it is being parsed.
+First, The parser reads the JSON one character at a time and uses lookup tables to decide what to do with each character. For {"name": "Alice"}, 
+Character: {  → "Start scanning an object (grammar_capture)"
+Character: "  → "Start scanning a string" 
+Character: n  → "Continue scanning the string"
+Character: a  → "Continue scanning the string"
+Character: m  → "Continue scanning the string"
+Character: e  → "Continue scanning the string"
+Character: "  → "End the string"
+Character: :  → "Key-value separator"
+Character: "  → "Start scanning a string"
+Character: A  → "Continue scanning the string"
+Character: l  → "Continue scanning the string"
+Character: i  → "Continue scanning the string"
+Character: c  → "Continue scanning the string"
+Character: e  → "Continue scanning the string"
+Character: "  → "End the string"
+Character: }  → "End the object"
+
+The parser builds a list of "tokens", the basic building blocks of the JSON, which becomes
+1. BEGIN_OBJECT_TOKEN ({)
+2. STRING_TOKEN ("name")
+3. KEY_SEPARATOR_TOKEN (:)
+4. STRING_TOKEN ("Alice")
+5. END_OBJECT_TOKEN (})
+
+The parser converts tokens into structured entries with parent-child relationships.
+Each entry knows:
+What type it is (object, string, number, etc.)
+Who its parent is
+How many children it has
+Where it is in the original JSON
+
+Finally, the parser sorts entries by their key hashes for fast lookups.
+Original order: [{"name": "Alice"}, {"age": 30}]
+Sorted order:   [{"age": 30}, {"name": "Alice"}]
 
 # Acknowledgements
 
diff --git a/src/_comparison_tools/lt.nr b/src/_comparison_tools/lt.nr
index 0ba5739..ab97f6e 100644
--- a/src/_comparison_tools/lt.nr
+++ b/src/_comparison_tools/lt.nr
@@ -54,18 +54,6 @@ pub fn lt_field_16_bit(x: Field, y: Field) -> bool {
     predicate
 }
 
-pub fn lt_field_8_bit(x: Field, y: Field) -> bool {
-    // Safety: check the comments below
-    let predicate = unsafe { get_lt_predicate_f(x, y) };
-    let delta = y as Field - x as Field;
-    let lt_parameter = 2 * (predicate as Field) * delta - predicate as Field - delta;
-    // checks that the bit length of lt_parameter is 8
-    // i.e. checks the sign of lt_parameter
-    lt_parameter.assert_max_bit_size::<8>();
-
-    predicate
-}
-
 pub fn assert_gt_240_bit(lhs: Field, rhs: Field) {
     // lhs > rhs
     // -> lhs - rhs > 0
diff --git a/src/_table_generation/table_generation.md b/src/_table_generation/table_generation.md
index e81742b..a3b7753 100644
--- a/src/_table_generation/table_generation.md
+++ b/src/_table_generation/table_generation.md
@@ -66,4 +66,6 @@ LITERAL_CAPTURE_INCREASE_LENGTH: True for t,r,u,e,f,a,l,s,n
 GRAMMAR_CAPTURE_ERROR_FLAG
 STRING_CAPTURE_ERROR_FLAG
 NUMERIC_CAPTURE_ERROR_FLAG
-LITERAL_CAPTURE_ERROR_FLAG
\ No newline at end of file
+LITERAL_CAPTURE_ERROR_FLAG
+
+PROCESS_RAW_TRANSCRIPT_TABLE: This table is used to post-process the raw transcript and add missing grammar tokens that were not captured during the initial scanning in build_transcript. Input: encoded_ascii of the last token in each entry (scan_mode + ascii character). Output: containing: token: The token type for this entry, new_grammar: Whether to add a missing grammar token, and scan_token: The type of grammar token to add (if needed), such as END_OBJECT_TOKEN }, or VALUE_SEPARATOR_TOKEN comma.
\ No newline at end of file
diff --git a/src/json.nr b/src/json.nr
index 833ef1b..492d80b 100644
--- a/src/json.nr
+++ b/src/json.nr
@@ -116,6 +116,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
     }
 
     // TODO: when impl is more mature, merge this into create_json_entries
+    // correctly identify and label key tokens that are previous labeled as string tokens
     fn keyswap(&mut self) {
         // TODO: this won't work if 1st entry is a key!
         let mut current = TranscriptEntry::from_field(self.transcript[0]);
@@ -124,6 +125,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
         for i in 0..MaxNumTokens - 1 {
             next = TranscriptEntry::from_field(self.transcript[i + 1]);
 
+            //if next token is :, current token is a key, so next_is_key = 1 and we can swap the token
             let next_is_key = (next.token == KEY_SEPARATOR_TOKEN as Field) as Field;
 
             let valid_token = TOKEN_IS_STRING[cast_num_to_u32(current.token)];
@@ -133,6 +135,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             );
 
             let old_transcript = self.transcript[i];
+            // only change is set the token to be KEY_TOKEN
             let new_transcript = TranscriptEntry::to_field(
                 TranscriptEntry {
                     token: KEY_TOKEN as Field,
@@ -540,11 +543,12 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
     unconstrained fn __build_transcript(self) -> [Field; MaxNumTokens] {
         let mut raw_transcript: [Field; MaxNumTokens] = [0; MaxNumTokens];
         let mut transcript_ptr: u32 = 0;
+        // We start in grammar capture mode, expecting to see a { or [.
         let mut scan_mode = GRAMMAR_CAPTURE;
         let mut length: Field = 0;
         let mut previous_was_potential_escape_sequence = 0;
         for i in 0..NumBytes {
-            // while this assert is in an unconstrained function, the out of bounds accesss `raw_transcript[transcript_ptr]` in build_transcript also generates failing constraints
+            // while this assert is in an unconstrained function, the out of bounds access `raw_transcript[transcript_ptr]` in build_transcript also generates failing constraints
             assert(transcript_ptr < MaxNumTokens, "build_transcript: MaxNumTokens limit exceeded!");
             let ascii = self.json[i];
 
@@ -552,18 +556,23 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
                 previous_was_potential_escape_sequence * 1024 + scan_mode * 256 + ascii as Field;
             let ScanData { scan_token, push_transcript, increase_length, is_potential_escape_sequence } =
                 ScanData::from_field(JSON_CAPTURE_TABLE[cast_num_to_u32(encoded_ascii)]);
+            // increase_length and push_transcript are contradictory
+            // increase_length = true means "extend the current token"
+            // push_transcript = true means "start a new token"
             let mut push_transcript = push_transcript;
             let mut scan_token = scan_token;
             let mut increase_length = increase_length;
 
             if push_transcript == 1 {
                 let new_entry = RawTranscriptEntry::to_field(
+                    // index is where the token starts in the original JSON
                     RawTranscriptEntry { encoded_ascii, index: i as Field - length, length },
                 );
 
                 raw_transcript[transcript_ptr] = new_entry;
                 transcript_ptr += 1;
-                length = increase_length;
+                // reset length to 0 as we're starting a new token
+                length = 0;
             } else {
                 length += increase_length;
             }
@@ -617,6 +626,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
         // Safety: check the comments below
         let raw_transcript = unsafe { self.__build_transcript() };
 
+        // steps to verify the transcript is correct
         // 14 gates per iteration, plus fixed cost for initing 2,048 size lookup table (4,096 gates)
         let mut previous_was_potential_escape_sequence = 0;
         for i in 0..NumBytes {
@@ -659,10 +669,10 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             scan_mode = scan_token;
         }
 
-        // we encode error flag into the scan_token value, which must be less than 4
+        // we encode error flag into the scan_token value, which must be less than 4 (object, array, string, literal)
         // the lookup into JSON_CAPTURE_TABLE applies an implicit 2-bit range check on `scan_token`
         // however this does not get triggered if the final byte scanned produces an error state
-        length.assert_max_bit_size::<2>();
+        scan_mode.assert_max_bit_size::<2>();
 
         JSON {
             json: self.json,
@@ -689,19 +699,24 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
         let mut transcript_ptr: u32 = 0;
         // TODO: do we need a null transcript value?!?!
         for i in 0..MaxNumTokens {
+            // encoded_ascii is the encoded scan_mode + ascii character of the LAST token in the entry
             let RawTranscriptEntry { encoded_ascii, index, length } =
                 RawTranscriptEntry::from_field(self.raw_transcript[i]);
 
+            // If there is missing grammar, token will be LITERAL_TOKEN or NUMERIC_TOKEN, and new_grammar will be true, and scan_token will be a grammar token, such as END_OBJECT_TOKEN or VALUE_SEPARATOR_TOKEN
             let PostProcessScanData { token, new_grammar, scan_token } = PostProcessScanData::from_field(
                 PROCESS_RAW_TRANSCRIPT_TABLE[cast_num_to_u32(encoded_ascii)],
             );
 
+            // set the token to be the last token in the literal/numeric instead of the grammar token
             let entry = TranscriptEntry::to_field(TranscriptEntry { token, index, length });
             updated_transcript[transcript_ptr] = entry;
 
+            //self.transcript_length is the number of entries after building transcript
             let index_valid: u32 = (i < self.transcript_length) as u32;
             transcript_ptr += index_valid;
 
+            //index_of_possible_grammar is the index of the grammar token after last token in the literal/numeric.
             let index_of_possible_grammar = (index + length);
             let new_entry =
                 TranscriptEntry { token: scan_token, index: index_of_possible_grammar, length: 0 };
@@ -722,7 +737,8 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
      * @brief Check for missing tokens that we could have missed in `build_transcript`
      * @details If we had a json string where a NUMERIC_TOKEN or LITERAL_TOKEN is directly succeeded by a VALUE_SEPARATOR_TOKEN, END_OBJECT_TOKEN, END_ARRAY_TOKEN,
      *          we will have missed the latter token.
-     *          We pick these up via the lookup table PROCESS_RAW_TRANSCRIPT_TABLE
+     *          We pick these up via the lookup table PROCESS_RAW_TRANSCRIPT_TABLE. 
+     *          The entries in self.raw_transcript currently look like false}, true], null, where the grammar tokens are counted as part of the token.
      **/
     fn capture_missing_tokens(&mut self) {
         let mut transcript_ptr: Field = 0;

From 8a985b545976d4fc75e0e5364229fce2a328f8cb Mon Sep 17 00:00:00 2001
From: jialinli <jialinmli98@gmail.com>
Date: Wed, 6 Aug 2025 00:36:31 -0700
Subject: [PATCH 06/13] add literal validations

---
 src/json.nr | 57 ++++++++++++++++++++++++++++++++++++++++++++++-------
 1 file changed, 50 insertions(+), 7 deletions(-)

diff --git a/src/json.nr b/src/json.nr
index 492d80b..4c1aea1 100644
--- a/src/json.nr
+++ b/src/json.nr
@@ -759,6 +759,32 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             let index_valid: Field = range_valid[i] as Field;
             // 1 gate
             let entry = TranscriptEntry::to_field(TranscriptEntry { token, index, length });
+
+            if token == LITERAL_TOKEN as Field {
+                let index_as_u32 = index as u32;
+                index.assert_max_bit_size::<8>();
+                if length == 5 {
+                    let is_false = (self.json[index_as_u32] == 102) // 'f'
+                        & (self.json[index_as_u32 + 1] == 97) // 'a'
+                        & (self.json[index_as_u32 + 2] == 108) // 'l'
+                        & (self.json[index_as_u32 + 3] == 115) // 's'
+                        & (self.json[index_as_u32 + 4] == 101); // 'e'
+                    assert(is_false, "invalid literal");
+                } else if length == 4 {
+                    let is_true = (self.json[index_as_u32] == 116) // 't'
+                        & (self.json[index_as_u32 + 1] == 114) // 'r'
+                        & (self.json[index_as_u32 + 2] == 117) // 'u'
+                        & (self.json[index_as_u32 + 3] == 101); // 'e'
+
+                    let is_null = (self.json[index_as_u32] == 110) // 'n'
+                        & (self.json[index_as_u32 + 1] == 117) // 'u'
+                        & (self.json[index_as_u32 + 2] == 108) // 'l'
+                        & (self.json[index_as_u32 + 3] == 108); // 'l'
+                    assert(is_null | is_true, "invalid literal");
+                } else {
+                    assert(false, "invalid literal");
+                }
+            }
             // 2 gates
             let diff = updated_transcript[cast_num_to_u32(transcript_ptr)] - entry;
             std::as_witness(diff);
@@ -1075,13 +1101,6 @@ fn test_json_char_outside_of_string_fails() {
     let _: JSON<26, 10, 20, 20, 2> = JSON::parse_json_from_string(text);
 }
 
-#[test(should_fail_with = "ValidationFlags: grammar error")]
-fn test_json_char_outside_of_string_fails_2() {
-    // n could be the start of the literal "null", so this passes the ScanData check but fails ValidationFlags
-    let text = "{ \"hello \", \"world\" n}";
-    let _: JSON<26, 10, 20, 20, 2> = JSON::parse_json_from_string(text);
-}
-
 #[test(should_fail_with = "ValidationFlags: grammar error")]
 fn test_json_array_with_invalid_tokens_fails() {
     // n could be the start of the literal "null", so this passes the ScanData check but fails ValidationFlags
@@ -1133,3 +1152,27 @@ fn key_is_not_a_key() {
     let json_string = "{1\n:0}";
     let _: JSON<26, 10, 20, 20, 2> = JSON::parse_json_from_string(json_string);
 }
+
+#[test(should_fail_with = "invalid literal")]
+fn test_invalid_literal() {
+    let text = "{ \"name\":fal }";
+    let _: JSON<153, 10, 60, 60, 2> = JSON::parse_json_from_string(text);
+}
+
+#[test(should_fail_with = "invalid literal")]
+fn test_invalid_literal_2() {
+    let text = "{ \"name\":treu}";
+    let _: JSON<153, 10, 60, 60, 2> = JSON::parse_json_from_string(text);
+}
+
+#[test(should_fail_with = "invalid literal")]
+fn test_invalid_literal_3() {
+    let text = "{ \"name\":truea }";
+    let _: JSON<153, 10, 60, 60, 2> = JSON::parse_json_from_string(text);
+}
+
+#[test(should_fail_with = "invalid literal")]
+fn test_invalid_literal_4() {
+    let text = "{ \"hello \", \"world\" n}";
+    let _: JSON<26, 10, 20, 20, 2> = JSON::parse_json_from_string(text);
+}

From 61918a131c899b7900ce9185eacf84e5d0189299 Mon Sep 17 00:00:00 2001
From: Tom French <15848336+TomAFrench@users.noreply.github.com>
Date: Wed, 6 Aug 2025 18:44:51 +0000
Subject: [PATCH 07/13] chore: fix broken merge

---
 src/json.nr | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/json.nr b/src/json.nr
index 43e218e..f4930a4 100644
--- a/src/json.nr
+++ b/src/json.nr
@@ -326,7 +326,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             let is_end_of_object_or_array = is_end_of_object_or_array as Field;
             let is_start_of_object_or_array = is_start_of_object_or_array as Field;
             let new_context = new_context as Field;
-            let update_key = update_key as Field;
+            let is_key_token = is_key_token as Field;
             let is_value_token = is_value_token as Field;
             let preserve_num_entries = preserve_num_entries as Field;
 

From 1d3e746c86582bee46747fdcad37908f28c4ac4a Mon Sep 17 00:00:00 2001
From: jialinli <jialinmli98@gmail.com>
Date: Wed, 6 Aug 2025 11:45:56 -0700
Subject: [PATCH 08/13] merge conflict

---
 src/json.nr | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/src/json.nr b/src/json.nr
index 43e218e..6607024 100644
--- a/src/json.nr
+++ b/src/json.nr
@@ -326,7 +326,6 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             let is_end_of_object_or_array = is_end_of_object_or_array as Field;
             let is_start_of_object_or_array = is_start_of_object_or_array as Field;
             let new_context = new_context as Field;
-            let update_key = update_key as Field;
             let is_value_token = is_value_token as Field;
             let preserve_num_entries = preserve_num_entries as Field;
 
@@ -339,7 +338,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             // only update current_key_index_and_length if the token is a key token
             let diff = (index + length * 0x10000) - current_key_index_and_length;
             std::as_witness(diff);
-            current_key_index_and_length = diff * is_key_token + current_key_index_and_length;
+            current_key_index_and_length = diff * is_key_token as Field + current_key_index_and_length;
             std::as_witness(current_key_index_and_length);
 
             // If the current token is BEGIN_OBJECT_TOKEN or BEGIN_ARRAY_TOKEN,

From b2a31c6562e2fb85983837948f2dfd7c6e78cd0a Mon Sep 17 00:00:00 2001
From: jialinli <jialinmli98@gmail.com>
Date: Wed, 6 Aug 2025 12:45:22 -0700
Subject: [PATCH 09/13] remove unnecessary code

---
 src/json.nr | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/src/json.nr b/src/json.nr
index 3d94ba3..7634e0b 100644
--- a/src/json.nr
+++ b/src/json.nr
@@ -361,10 +361,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
                 },
             );
             // subtotal 22.25
-            // 1 gate
-            // set new_entry to object_or_array_entry if the token is end of an object or array, or set new_entry to value_entry
-            let depth_index: Field = (depth - 1);
-            // 3.5 gates
+
             parent_context_stack[cast_num_to_u32(depth)] = new_context_stack_entry;
 
             // `previous_stack_entry` = Extract information about the parent of the current token

From 569efb90b361e7fc8327424f0f128deb4a0e8d34 Mon Sep 17 00:00:00 2001
From: jialinli <me@jialinli.net>
Date: Thu, 7 Aug 2025 18:44:39 -0700
Subject: [PATCH 10/13] merge conflict

---
 src/json.nr   |  4 ++--
 src/keymap.nr | 27 ---------------------------
 2 files changed, 2 insertions(+), 29 deletions(-)

diff --git a/src/json.nr b/src/json.nr
index 20923c4..43d21f2 100644
--- a/src/json.nr
+++ b/src/json.nr
@@ -126,7 +126,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             next = TranscriptEntry::from_field(self.transcript[i + 1]);
 
             //if next token is :, current token is a key, so next_is_key = 1 and we can swap the token
-            let next_is_key = (next.token == KEY_SEPARATOR_TOKEN as Field) as Field;
+            let next_is_key = next.token == KEY_SEPARATOR_TOKEN as Field;
 
             let valid_token = TOKEN_IS_STRING[cast_num_to_u32(current.token)];
             if !valid_token {
@@ -644,7 +644,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
 
             // 1 gate
             let encoded_ascii: Field =
-                previous_was_potential_escape_sequence * 1024 + scan_mode * 256 + ascii as Field;
+                previous_was_potential_escape_sequence as Field * 1024 + scan_mode * 256 + ascii as Field;
             std::as_witness(encoded_ascii);
 
             // 2 gates
diff --git a/src/keymap.nr b/src/keymap.nr
index 2786add..8669263 100644
--- a/src/keymap.nr
+++ b/src/keymap.nr
@@ -77,40 +77,24 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             sorted_entries[sort_result.sort_indices[i]] = self.json_entries_packed[i];
         }
 
-<<<<<<< HEAD
-        let mut parent_indices: [Field; MaxNumValues] = [0; MaxNumValues];
-
-        let mut identity_to_json_map: [Field; MaxNumValues] = [0; MaxNumValues];
-=======
         let mut parent_indices: [u32; MaxNumValues] = [0; MaxNumValues];
 
         let mut identity_to_json_map: [u32; MaxNumValues] = [0; MaxNumValues];
->>>>>>> 6d190ca4391e4682a576af7666a99885a39ada4b
         for i in 0..MaxNumValues {
             // 11.75 + 3.5 = 15.25 gates per iteration
             let (id, parent_index, entry_type) = JSONEntry::extract_entry_type_id_and_parent_index_from_field(
                 sorted_entries[i].value,
             );
-<<<<<<< HEAD
-            parent_indices[i] = parent_index;
-=======
             parent_indices[i] = cast_num_to_u32(parent_index);
->>>>>>> 6d190ca4391e4682a576af7666a99885a39ada4b
             // 2 gates
             // update is 1 for end of object/array, 0 for other
             let update = TOKEN_ENDS_OBJECT_OR_ARRAY[cast_num_to_u32(entry_type)];
             // NOTE THIS RELIES ON MaxNumValues ACTUALLY DESCRIBING NUMMaxNumValues + 1
-<<<<<<< HEAD
-            // index = id if update = 1, else MaxNumValues -1
-            // 1 gate
-            let index = (id - (MaxNumValues as Field - 1)) * update + (MaxNumValues as Field - 1);
-=======
             let index = if update {
                 cast_num_to_u32(id)
             } else {
                 MaxNumValues - 1
             };
->>>>>>> 6d190ca4391e4682a576af7666a99885a39ada4b
             // 3.5 gates
             identity_to_json_map[index] = i;
         }
@@ -121,16 +105,9 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             // if the parent identity changes,
             // 3.5 gate
             // the list is sorted according to parent_identity,
-<<<<<<< HEAD
-            // n.b. parent_identity_post - parent_identity_pre is not neccessarily 0 or 1 (can be larger)
-            //      due to empty objects and arrays increasing identity value without creating associated child json entries
-            // let new_parent = (parent_identity_post as u32 > parent_identity_pre as u32) as Field;
-            let new_parent = lt_field_16_bit(parent_identity_pre, parent_identity_post) as Field;
-=======
             // n.b. parent_identity_post - parent_identity_pre is not necessarily 0 or 1 (can be larger)
             //      due to empty objects and arrays increasing identity value without creating associated child json entries
             let new_parent = parent_identity_pre < parent_identity_post;
->>>>>>> 6d190ca4391e4682a576af7666a99885a39ada4b
             // 3.5 gates
             let index_of_parent = identity_to_json_map[parent_identity_post];
             // 1 gate + 3.5 gates
@@ -144,11 +121,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             let index = if new_parent { index_of_parent } else { 0 };
             // 3.5 gates
             //index is just 0 if new_parent is false, so sorted_entries[0] is useless info
-<<<<<<< HEAD
-            sorted_entries[cast_num_to_u32(index)] = JSONEntryPacked { value: updated };
-=======
             sorted_entries[index] = JSONEntryPacked { value: updated };
->>>>>>> 6d190ca4391e4682a576af7666a99885a39ada4b
 
             parent_identity_pre = parent_identity_post;
         }

From 78c67ffc3367de037996cc8e701c6f4952f88e35 Mon Sep 17 00:00:00 2001
From: jialinli <me@jialinli.net>
Date: Thu, 7 Aug 2025 19:15:54 -0700
Subject: [PATCH 11/13] format

---
 src/json.nr | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/src/json.nr b/src/json.nr
index 43d21f2..6beb106 100644
--- a/src/json.nr
+++ b/src/json.nr
@@ -643,8 +643,9 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             let ascii = self.json[i];
 
             // 1 gate
-            let encoded_ascii: Field =
-                previous_was_potential_escape_sequence as Field * 1024 + scan_mode * 256 + ascii as Field;
+            let encoded_ascii: Field = previous_was_potential_escape_sequence as Field * 1024
+                + scan_mode * 256
+                + ascii as Field;
             std::as_witness(encoded_ascii);
 
             // 2 gates

From 0f320e1de6b9d0971223b339e2e76ad68fdc01a2 Mon Sep 17 00:00:00 2001
From: jialinli <me@jialinli.net>
Date: Thu, 7 Aug 2025 19:15:54 -0700
Subject: [PATCH 12/13] format

---
 src/json.nr | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

diff --git a/src/json.nr b/src/json.nr
index 43d21f2..7471f02 100644
--- a/src/json.nr
+++ b/src/json.nr
@@ -431,10 +431,10 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
                 self.json_entries_packed[entry_ptr] =
                     JSONEntryPacked { value: new_entry * create_json_entry as Field };
             }
-            // Update `next_identity_value` and `current_identity_value` (unique identfiers for JSON entries)
+            // Update `next_identity_value` and `current_identity_value` (unique identifiers for JSON entries)
             // If we've ended an object/array, set `current_identity_value` to the value of the object/array's parent (whose context we're now in)
-            // If we've started an object/array, set `current_identity_value` to `next_identity_value` (the next unused unique identifier)
-            // If we've started an object/array, also increment `next_identity_value` to a new unique value
+            // If we've started an object/array, set `current_identity_value` to `next_identity_value` (the next unused unique identifier), also increment `next_identity_value` to a new unique value
+            //  When processing normal tokens: keep current ID unchanged
             // Pseudocode for the following:
             //
             //  if !preserve_num_entries {
@@ -447,6 +447,11 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             //      next_identity_value += is_start_of_object_or_array;
             //  }
             //
+            // Note: `preserve_num_entries` is mutually exclusive with `is_start_of_object_or_array` and `is_end_of_object_or_array`
+            // - When `is_start_of_object_or_array = 1` -> `preserve_num_entries = 0` (start new object/array)
+            // - When `is_end_of_object_or_array = 1` -> `preserve_num_entries = 0` (end object/array)  
+            // - When `preserve_num_entries = 1` -> both flags = 0 (normal token)
+            //
             // 4 gates
             {
                 let old = current_identity_value;
@@ -643,8 +648,9 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             let ascii = self.json[i];
 
             // 1 gate
-            let encoded_ascii: Field =
-                previous_was_potential_escape_sequence as Field * 1024 + scan_mode * 256 + ascii as Field;
+            let encoded_ascii: Field = previous_was_potential_escape_sequence as Field * 1024
+                + scan_mode * 256
+                + ascii as Field;
             std::as_witness(encoded_ascii);
 
             // 2 gates

From 1126e06972ae0401edb1871d04f1e4b5b5c8c7a5 Mon Sep 17 00:00:00 2001
From: jialinli <me@jialinli.net>
Date: Wed, 13 Aug 2025 09:09:07 -0700
Subject: [PATCH 13/13] lint

---
 src/json.nr | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/src/json.nr b/src/json.nr
index d66d50b..754165e 100644
--- a/src/json.nr
+++ b/src/json.nr
@@ -449,7 +449,7 @@ impl<let NumBytes: u32, let NumPackedFields: u32, let MaxNumTokens: u32, let Max
             //
             // Note: `preserve_num_entries` is mutually exclusive with `is_start_of_object_or_array` and `is_end_of_object_or_array`
             // - When `is_start_of_object_or_array = 1` -> `preserve_num_entries = 0` (start new object/array)
-            // - When `is_end_of_object_or_array = 1` -> `preserve_num_entries = 0` (end object/array)  
+            // - When `is_end_of_object_or_array = 1` -> `preserve_num_entries = 0` (end object/array)
             // - When `preserve_num_entries = 1` -> both flags = 0 (normal token)
             //
             // 4 gates