[Fix] Fix the efficiency issue in repetition structure. (#467)

Seven-Streams · web-flow · commit 07e978f701de · 2025-11-07T15:34:20.000+08:00
As reported in #465, this PR's target is to fix the efficiency issue in the repetition structure. The issue is caused by two reasons in the repetition construction method: - The threshold is too small. - The construction of the other part, except for the repetition expression, is not good enough. This PR fixes the problem. The efficiency can be evaluated with the following code: ``` python import xgrammar as xgr from transformers import AutoTokenizer from xgrammar.testing import _get_matcher_from_grammar_and_tokenizer_info import time schema = { "additionalProperties": False, "properties": { "description": { "maxLength": 200, "minLength": 10, "title": "Some description", "type": "string" } }, "required": [ "description" ], "title": "Some object", "type": "object" } tokenizer_path = "meta-llama/Meta-Llama-3-8B-Instruct" tokenizer = AutoTokenizer.from_pretrained(tokenizer_path, use_fast=True, trust_remote_code=True) tokenizer_info = xgr.TokenizerInfo.from_huggingface(tokenizer) grammar = xgr.Grammar.from_json_schema(schema) matcher = _get_matcher_from_grammar_and_tokenizer_info(grammar, tokenizer_info) test_str = "{\"description\": \"1234567890123456789101234567891012345678910123456789101234567891" + \ "012345678910123456789101234567891012345678910123456789101234567891012345678910123456" + \ "78910123456789101234567897891012345678910123456789\"}" token_bitmask = xgr.allocate_token_bitmask(1, tokenizer_info.vocab_size) tpot = [] print(grammar) for char in test_str: start_time = time.time_ns() matcher.fill_next_token_bitmask(token_bitmask) assert matcher.accept_string(char) end_time = time.time_ns() print(f"Processed char '{char}' in {(end_time - start_time)/1e3} us") tpot.append((end_time - start_time)/1e3) tpot.sort() print(f"avg: {sum(tpot) / len(tpot)} us") print(f"max: {tpot[-1]} us") print(f"p50: {tpot[int(0.5 * len(tpot))]} us") print(f"p90: {tpot[int(0.9 * len(tpot))]} us") ``` And the results are listed below: ``` v0.1.21 avg: 544.5573364055299 us max: 6720.648 us p50: 332.969 us p90: 620.647 us main branch avg: 4789.501852534562 us max: 7862.335 us p50: 5813.238 us p90: 6398.427 us This PR avg: 353.5097096774194 us max: 1064.672 us p50: 449.098 us p90: 664.901 us ``` --------- Signed-off-by: Yuchuan <yuchuan.7streams@gmail.com>
diff --git a/cpp/grammar_impl.h b/cpp/grammar_impl.h
@@ -125,7 +125,7 @@ class Grammar::Impl {
     // tag_expr should be a byte string, and rule_id should be a rule id.
     // loop_after_dispatch is a bool.
     kTagDispatch,
-    // data format: [grammar_expr_id, min_repeat_count, max_repeat_count]
+    // data format: [rule_id, min_repeat_count, max_repeat_count]
     kRepeat,
   };
 
diff --git a/cpp/grammar_parser.cc b/cpp/grammar_parser.cc
@@ -494,6 +494,7 @@ class EBNFParser {
   int32_t HandlePlusQuantifier(int32_t grammar_expr_id);
   int32_t HandleQuestionQuantifier(int32_t grammar_expr_id);
   int32_t HandleRepetitionRange(int32_t grammar_expr_id, int64_t lower, int64_t upper);
+  int32_t LegacyHandleRepetitionRange(int32_t grammar_expr_id, int64_t lower, int64_t upper);
 
   // When parsing, we first find the names of all rules, and build the mapping from name to rule id.
   void InitRuleNames();
@@ -757,71 +758,84 @@ int32_t EBNFParser::HandleQuestionQuantifier(int32_t grammar_expr_id) {
   return builder_.AddRuleRef(new_rule_id);
 }
 
+int32_t EBNFParser::LegacyHandleRepetitionRange(
+    int32_t grammar_expr_id, int64_t lower, int64_t upper
+) {
+  // Construct expr expr ... expr (l times)
+
+  std::vector<int32_t> elements;
+  for (int64_t i = 0; i < lower; ++i) {
+    elements.push_back(grammar_expr_id);
+  }
+
+  // Case 1: {l}:
+  // expr expr ... expr (l times)
+  if (upper == lower) {
+    return builder_.AddSequence(elements);
+  }
+
+  // Case 2: {l,}:
+  // expr expr ... expr (l times) rest
+  // rest ::= "" | expr rest
+  if (upper == -1) {
+    auto new_rule_name = builder_.GetNewRuleName(cur_rule_name_);
+    auto new_rule_id = builder_.AddEmptyRule(new_rule_name);
+    auto ref_to_new_rule = builder_.AddRuleRef(new_rule_id);
+    auto new_grammar_expr_id = builder_.AddChoices(
+        {builder_.AddEmptyStr(), builder_.AddSequence({grammar_expr_id, ref_to_new_rule})}
+    );
+    builder_.UpdateRuleBody(new_rule_id, new_grammar_expr_id);
+    elements.push_back(builder_.AddRuleRef(new_rule_id));
+    return builder_.AddSequence(elements);
+  }
+
+  // Case 3: {l, r} (r - l >= 1)
+  // expr expr ... expr (l times) rest1
+  // rest1 ::= "" | expr rest2
+  // rest2 ::= "" | expr rest3
+  // ...
+  // rest(r - l) ::= "" | expr
+  std::vector<int32_t> rest_rule_ids;
+
+  for (int64_t i = 0; i < upper - lower; ++i) {
+    auto new_rule_name = builder_.GetNewRuleName(cur_rule_name_);
+    rest_rule_ids.push_back(builder_.AddEmptyRule(new_rule_name));
+  }
+  for (int64_t i = 0; i < upper - lower - 1; ++i) {
+    auto ref_to_next_rule = builder_.AddRuleRef(rest_rule_ids[i + 1]);
+    auto new_grammar_expr_id = builder_.AddChoices(
+        {builder_.AddEmptyStr(), builder_.AddSequence({grammar_expr_id, ref_to_next_rule})}
+    );
+    builder_.UpdateRuleBody(rest_rule_ids[i], new_grammar_expr_id);
+  }
+  auto last_grammar_expr_id = builder_.AddChoices({builder_.AddEmptyStr(), grammar_expr_id});
+  builder_.UpdateRuleBody(rest_rule_ids.back(), last_grammar_expr_id);
+
+  elements.push_back(builder_.AddRuleRef(rest_rule_ids[0]));
+  return builder_.AddSequence(elements);
+}
+
 int32_t EBNFParser::HandleRepetitionRange(
     const int32_t grammar_expr_id, int64_t lower, int64_t upper
 ) {
-  static const int64_t kUnzipThreshold = 15;
+  static const int64_t kUnzipThreshold = 128;
   XGRAMMAR_DCHECK(lower >= 0);
   XGRAMMAR_DCHECK(upper == -1 || upper >= lower);
-  // Case 1. small (<=threshold), unzip the repetition.
-  if (upper != -1 && upper <= kUnzipThreshold) {
-    std::vector<int32_t> choices;
-    if (lower == 0) {
-      choices.push_back(builder_.AddEmptyStr());
-      lower = 1;  // We have already handled the empty string case.
-    }
-    for (int64_t count = lower; count <= upper; ++count) {
-      std::vector<int32_t> sequence;
-      for (int64_t i = 0; i < count; ++i) {
-        sequence.push_back(grammar_expr_id);
-      }
-      choices.push_back(builder_.AddSequence(sequence));
-    }
-    return builder_.AddChoices(choices);
+  // Case 1.1 small upper (<=threshold), unzip the repetition.
+  // Case 1.2 unbounded upper, and lower is also small (<=threshold), unzip the lower part.
+  if ((upper != -1 && upper <= kUnzipThreshold) || (upper == -1 && lower <= kUnzipThreshold)) {
+    return LegacyHandleRepetitionRange(grammar_expr_id, lower, upper);
   }
 
-  // Case 2. upper is unbounded or large.
+  // Case 2. upper is unbounded, and lower is large (>threshold).
+  // Or upper is bounded, but upper > threshold.
 
   // Case 2.1.1. lower is smaller than threshold, and upper is large. Transform {lower, upper} into:
-  // {threshold, upper} | {lower} | ... | {threshold}.
-  // Case 2.1.2. lower is smaller than threshold, and upper is unbounded. Unzip the
-  // {lower} repetition, and add a star expression.
+  // {threshold, upper} | {lower, threshold}
   std::vector<int32_t> choices;
   if (lower < kUnzipThreshold) {
-    if (upper == -1) {
-      int infinite_repetition_id = -1;
-      const auto& rule_expr = builder_.GetGrammarExpr(grammar_expr_id);
-      if (rule_expr.type == GrammarBuilder::GrammarExprType::kCharacterClass) {
-        std::vector<GrammarBuilder::CharacterClassElement> character_ranges;
-        bool is_negative = rule_expr[0];
-        for (int i = 1; i < static_cast<int>(rule_expr.size()); i += 2) {
-          character_ranges.push_back({rule_expr[i], rule_expr[i + 1]});
-        }
-        infinite_repetition_id = builder_.AddCharacterClassStar(character_ranges, is_negative);
-      } else {
-        const auto& unbounded_rule_id =
-            builder_.AddEmptyRule(builder_.GetNewRuleName(cur_rule_name_ + "_repeat_inf"));
-        int recursion_sequence =
-            builder_.AddSequence({grammar_expr_id, builder_.AddRuleRef(unbounded_rule_id)});
-        int recursion_choice = builder_.AddChoices({builder_.AddEmptyStr(), recursion_sequence});
-        builder_.UpdateRuleBody(unbounded_rule_id, recursion_choice);
-        infinite_repetition_id = builder_.AddRuleRef(unbounded_rule_id);
-      }
-      std::vector<int32_t> sequence(lower, grammar_expr_id);
-      sequence.push_back(infinite_repetition_id);
-      return builder_.AddSequence(sequence);
-    }
-    if (lower == 0) {
-      choices.push_back(builder_.AddEmptyStr());
-      lower = 1;
-    }
-    for (; lower < kUnzipThreshold; ++lower) {
-      std::vector<int32_t> sequence;
-      for (int64_t i = 0; i < lower; ++i) {
-        sequence.push_back(grammar_expr_id);
-      }
-      choices.push_back(builder_.AddSequence(sequence));
-    }
+    choices.push_back(LegacyHandleRepetitionRange(grammar_expr_id, lower, kUnzipThreshold - 1));
+    lower = kUnzipThreshold;
   }
 
   std::optional<int32_t> infinite_repetition_id = std::nullopt;
diff --git a/tests/python/test_grammar_parser.py b/tests/python/test_grammar_parser.py
@@ -146,7 +146,7 @@ def test_repetition_range_exact():
     """Test repetition range with exact count {n}."""
     before = """root ::= "a"{3}
 """
-    expected = """root ::= (((("a" "a" "a"))))
+    expected = """root ::= ((("a" "a" "a")))
 """
     grammar = _ebnf_to_grammar_no_normalization(before)
     after = str(grammar)
@@ -157,7 +157,9 @@ def test_repetition_range_min_max():
     """Test repetition range with min and max {n,m}."""
     before = """root ::= "a"{2,4}
 """
-    expected = """root ::= (((("a" "a") | ("a" "a" "a") | ("a" "a" "a" "a"))))
+    expected = """root ::= ((("a" "a" root_1)))
+root_1 ::= ("" | ("a" root_2))
+root_2 ::= ("" | "a")
 """
     grammar = _ebnf_to_grammar_no_normalization(before)
     after = str(grammar)
@@ -168,8 +170,8 @@ def test_repetition_range_min_only():
     """Test repetition range with only min {n,}."""
     before = """root ::= "a"{2,}
 """
-    expected = """root ::= ((("a" "a" root_repeat_inf)))
-root_repeat_inf ::= ("" | ("a" root_repeat_inf))
+    expected = """root ::= ((("a" "a" root_1)))
+root_1 ::= ("" | ("a" root_1))
 """
     grammar = _ebnf_to_grammar_no_normalization(before)
     after = str(grammar)
@@ -264,9 +266,11 @@ def test_combined_features():
 rule2 ::= [0-9]+ "." [0-9]*
 """
     expected = """root ::= (("start" root_1 "end"))
-rule1 ::= (((([a-z]) | ([a-z] [a-z]) | ([a-z] [a-z] [a-z])))) (=((":")))
+rule1 ::= ((([a-z] rule1_1))) (=((":")))
 rule2 ::= ((rule2_1 "." [0-9]*))
 root_1 ::= ((((rule1) | (rule2)) root_1) | ((rule1) | (rule2)))
+rule1_1 ::= ("" | ([a-z] rule1_2))
+rule1_2 ::= ("" | [a-z])
 rule2_1 ::= (([0-9] rule2_1) | [0-9])
 """
     grammar = _ebnf_to_grammar_no_normalization(before)
@@ -338,29 +342,26 @@ def test_repetition_range():
 """
 
     expected = """root ::= ((a b c d e f g))
-a ::= (("a") | ("a" "a"))
-b ::= ((a) | ("b") | (b_1 b_2) | (b_3 b_4 b_5) | (b_6 b_7 b_8 b_9) | (b_10 b_11 b_12 b_13 b_14))
-c ::= ("" | ("c") | ("c" "c"))
-d ::= ((d_repeat_inf))
-e ::= (("e" "e" e_repeat_inf))
+a ::= (("a" a_1))
+b ::= ((b_5 b_1))
+c ::= ((c_1))
+d ::= ((d_1))
+e ::= (("e" "e" e_1))
 f ::= (("f" "f" "f"))
-g ::= ("")
-d_repeat_inf ::= ("" | ("d" d_repeat_inf))
-e_repeat_inf ::= ("" | ("e" e_repeat_inf))
-b_1 ::= ((a) | ("b"))
-b_2 ::= ((a) | ("b"))
-b_3 ::= ((a) | ("b"))
-b_4 ::= ((a) | ("b"))
+g ::= (())
+a_1 ::= ("" | ("a"))
+b_1 ::= ("" | (b_1_1 b_2))
+b_2 ::= ("" | (b_2_1 b_3))
+b_3 ::= ("" | (b_3_1 b_4))
+b_4 ::= ("" | (a) | ("b"))
+c_1 ::= ("" | ("c" c_2))
+c_2 ::= ("" | ("c"))
+d_1 ::= ("" | ("d" d_1))
+e_1 ::= ("" | ("e" e_1))
 b_5 ::= ((a) | ("b"))
-b_6 ::= ((a) | ("b"))
-b_7 ::= ((a) | ("b"))
-b_8 ::= ((a) | ("b"))
-b_9 ::= ((a) | ("b"))
-b_10 ::= ((a) | ("b"))
-b_11 ::= ((a) | ("b"))
-b_12 ::= ((a) | ("b"))
-b_13 ::= ((a) | ("b"))
-b_14 ::= ((a) | ("b"))
+b_1_1 ::= ((a) | ("b"))
+b_2_1 ::= ((a) | ("b"))
+b_3_1 ::= ((a) | ("b"))
 """
 
     grammar = _ebnf_to_grammar_no_normalization(before)
@@ -776,17 +777,17 @@ def test_error_consecutive_quantifiers():
 
 def test_repetition_normalizer():
     """Test the repetition normalizer. If the context is nullable, then the min repetition time will be reduced to 0."""
-    before = "root ::= ([0-9]*){100, 1000}"
-    expected_grammar = r"""root ::= ((root_repeat_1{0, 985} [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]*))
-root_repeat_1 ::= (([0-9]*)) (=([0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]*))
+    before = "root ::= ([0-9]*){200, 1000}"
+    expected_grammar = r"""root ::= ((root_repeat_1{0, 872} [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]*))
+root_repeat_1 ::= (([0-9]*)) (=([0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]* [0-9]*))
 """
     grammar = xgr.Grammar.from_ebnf(before)
     grammar = GrammarFunctor.grammar_optimizer(grammar)
     assert expected_grammar == str(grammar)
 
-    before = "root ::= ([0-9]){100, 1000}"
-    expected_grammar = r"""root ::= ((root_repeat_1{85, 985} [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9]))
-root_repeat_1 ::= (([0-9])) (=([0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9]))
+    before = "root ::= ([0-9]){200, 1000}"
+    expected_grammar = r"""root ::= ((root_repeat_1{72, 872} [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9]))
+root_repeat_1 ::= (([0-9])) (=([0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9] [0-9]))
 """
     grammar = xgr.Grammar.from_ebnf(before)
     grammar = GrammarFunctor.grammar_optimizer(grammar)
diff --git a/tests/python/test_json_schema_converter.py b/tests/python/test_json_schema_converter.py
@@ -2178,13 +2178,18 @@ def test_generate_float_regex():
 def test_limited_whitespace_cnt():
     expected_grammar = r"""basic_escape ::= (([\"\\/bfnrt]) | ("u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9])) (=(basic_string_sub))
 basic_string_sub ::= (("\"") | ([^\0-\x1f\"\\\r\n] basic_string_sub) | ("\\" basic_escape basic_string_sub)) (=(basic_string_sub_1 [,}\]:]))
-basic_string ::= (("\"" basic_string_sub)) (=(root_4 "}"))
-root ::= (("{" root_1 "\"key\"" root_2 ":" root_3 basic_string root_4 "}"))
-basic_string_sub_1 ::= ("" | ([ \n\t]) | ([ \n\t] [ \n\t]))
-root_1 ::= ("" | ([ \n\t]) | ([ \n\t] [ \n\t])) (=("\"key\"" root_2 ":" root_3 basic_string root_4 "}"))
-root_2 ::= ("" | ([ \n\t]) | ([ \n\t] [ \n\t])) (=(":" root_3 basic_string root_4 "}"))
-root_3 ::= ("" | ([ \n\t]) | ([ \n\t] [ \n\t])) (=(basic_string root_4 "}"))
-root_4 ::= ("" | ([ \n\t]) | ([ \n\t] [ \n\t])) (=("}"))
+basic_string ::= (("\"" basic_string_sub)) (=(root_7 "}"))
+root ::= (("{" root_1 "\"key\"" root_3 ":" root_5 basic_string root_7 "}"))
+basic_string_sub_1 ::= ("" | ([ \n\t] basic_string_sub_2))
+basic_string_sub_2 ::= ("" | ([ \n\t]))
+root_1 ::= ("" | ([ \n\t] root_2)) (=("\"key\"" root_3 ":" root_5 basic_string root_7 "}"))
+root_2 ::= ("" | ([ \n\t]))
+root_3 ::= ("" | ([ \n\t] root_4)) (=(":" root_5 basic_string root_7 "}"))
+root_4 ::= ("" | ([ \n\t]))
+root_5 ::= ("" | ([ \n\t] root_6)) (=(basic_string root_7 "}"))
+root_6 ::= ("" | ([ \n\t]))
+root_7 ::= ("" | ([ \n\t] root_8)) (=("}"))
+root_8 ::= ("" | ([ \n\t]))
 """
     schema = {"type": "object", "properties": {"key": {"type": "string"}}, "required": ["key"]}
     grammar = xgr.Grammar.from_json_schema(schema, any_whitespace=True, max_whitespace_cnt=2)
@@ -2200,13 +2205,18 @@ def test_limited_whitespace_cnt():
 def test_limited_whitespace_compile():
     expected_grammar = r"""basic_escape ::= (([\"\\/bfnrt]) | ("u" [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9] [A-Fa-f0-9])) (=(basic_string_sub))
 basic_string_sub ::= (("\"") | ([^\0-\x1f\"\\\r\n] basic_string_sub) | ("\\" basic_escape basic_string_sub)) (=(basic_string_sub_1 [,}\]:]))
-basic_string ::= (("\"" basic_string_sub)) (=(root_4 "}"))
-root ::= (("{" root_1 "\"key\"" root_2 ":" root_3 basic_string root_4 "}"))
-basic_string_sub_1 ::= ("" | ([ \n\t]) | ([ \n\t] [ \n\t]))
-root_1 ::= ("" | ([ \n\t]) | ([ \n\t] [ \n\t])) (=("\"key\"" root_2 ":" root_3 basic_string root_4 "}"))
-root_2 ::= ("" | ([ \n\t]) | ([ \n\t] [ \n\t])) (=(":" root_3 basic_string root_4 "}"))
-root_3 ::= ("" | ([ \n\t]) | ([ \n\t] [ \n\t])) (=(basic_string root_4 "}"))
-root_4 ::= ("" | ([ \n\t]) | ([ \n\t] [ \n\t])) (=("}"))
+basic_string ::= (("\"" basic_string_sub)) (=(root_7 "}"))
+root ::= (("{" root_1 "\"key\"" root_3 ":" root_5 basic_string root_7 "}"))
+basic_string_sub_1 ::= ("" | ([ \n\t] basic_string_sub_2))
+basic_string_sub_2 ::= ("" | ([ \n\t]))
+root_1 ::= ("" | ([ \n\t] root_2)) (=("\"key\"" root_3 ":" root_5 basic_string root_7 "}"))
+root_2 ::= ("" | ([ \n\t]))
+root_3 ::= ("" | ([ \n\t] root_4)) (=(":" root_5 basic_string root_7 "}"))
+root_4 ::= ("" | ([ \n\t]))
+root_5 ::= ("" | ([ \n\t] root_6)) (=(basic_string root_7 "}"))
+root_6 ::= ("" | ([ \n\t]))
+root_7 ::= ("" | ([ \n\t] root_8)) (=("}"))
+root_8 ::= ("" | ([ \n\t]))
 """
     schema = {"type": "object", "properties": {"key": {"type": "string"}}, "required": ["key"]}
     tokenizer_info = xgr.TokenizerInfo([])