mlc-ai
diff --git a/‎cpp/grammar/grammar.h
Lines changed: 5 additions & 5 deletions b/‎cpp/grammar/grammar.h
Lines changed: 5 additions & 5 deletions
diff --git a/‎cpp/grammar/grammar_builder.h
Lines changed: 4 additions & 4 deletions b/‎cpp/grammar/grammar_builder.h
Lines changed: 4 additions & 4 deletions
diff --git a/‎cpp/grammar/grammar_functor.h
Lines changed: 3 additions & 3 deletions b/‎cpp/grammar/grammar_functor.h
Lines changed: 3 additions & 3 deletions
diff --git a/‎cpp/grammar/grammar_parser.h
Lines changed: 3 additions & 3 deletions b/‎cpp/grammar/grammar_parser.h
Lines changed: 3 additions & 3 deletions
diff --git a/‎cpp/grammar/grammar_serializer.cc
Lines changed: 0 additions & 1 deletion b/‎cpp/grammar/grammar_serializer.cc
Lines changed: 0 additions & 1 deletion
diff --git a/‎cpp/grammar/grammar_serializer.h
Lines changed: 3 additions & 3 deletions b/‎cpp/grammar/grammar_serializer.h
Lines changed: 3 additions & 3 deletions
diff --git a/‎cpp/grammar/grammar_state_matcher.cc
Lines changed: 1 addition & 48 deletions b/‎cpp/grammar/grammar_state_matcher.cc
Lines changed: 1 addition & 48 deletions
diff --git a/‎cpp/grammar/grammar_state_matcher.h
Lines changed: 3 additions & 3 deletions b/‎cpp/grammar/grammar_state_matcher.h
Lines changed: 3 additions & 3 deletions
diff --git a/‎cpp/grammar/grammar_state_matcher_base.h
Lines changed: 12 additions & 11 deletions b/‎cpp/grammar/grammar_state_matcher_base.h
Lines changed: 12 additions & 11 deletions
diff --git a/‎cpp/grammar/grammar_state_matcher_preproc.h
Lines changed: 5 additions & 3 deletions b/‎cpp/grammar/grammar_state_matcher_preproc.h
Lines changed: 5 additions & 3 deletions
@@ -4,8 +4,8 @@
  * \brief The header for the support of grammar-guided generation.
  */
 
-#ifndef MLC_LLM_SERVE_GRAMMAR_GRAMMAR_H_
-#define MLC_LLM_SERVE_GRAMMAR_GRAMMAR_H_
+#ifndef MLC_LLM_GRAMMAR_GRAMMAR_H_
+#define MLC_LLM_GRAMMAR_GRAMMAR_H_
 
 #include <tvm/runtime/object.h>
 #include <tvm/runtime/registry.h>
@@ -191,10 +191,10 @@ class BNFGrammar : public ObjectRef {
    * format of the schema of a JSON file. We will parse the schema and generate a BNF grammar.
    * \param schema The schema string.
    * \param indent The number of spaces for indentation. If set to std::nullopt, the output will be
-   * in one line. Default: std::nullopt.
+   * in one line. Default: 2.
    * \param separators Two separators used in the schema: comma and colon. Examples: {",", ":"},
    * {", ", ": "}. If std::nullopt, the default separators will be used: {",", ": "} when the
-   * indent is not -1, and {", ", ": "} otherwise. This follows the convention in python
+   * indent is not nullopt, and {", ", ": "} otherwise. This follows the convention in python
    * json.dumps(). Default: std::nullopt.
    * \param strict_mode Whether to use strict mode. In strict mode, the generated grammar will not
    * allow properties and items that is not specified in the schema. This is equivalent to
@@ -223,4 +223,4 @@ class BNFGrammar : public ObjectRef {
 }  // namespace llm
 }  // namespace mlc
 
-#endif  // MLC_LLM_SERVE_GRAMMAR_GRAMMAR_H_
+#endif  // MLC_LLM_GRAMMAR_GRAMMAR_H_
@@ -4,8 +4,8 @@
  * \brief The header for the building the BNF AST.
  */
 
-#ifndef MLC_LLM_SERVE_GRAMMAR_GRAMMAR_BUILDER_H_
-#define MLC_LLM_SERVE_GRAMMAR_GRAMMAR_BUILDER_H_
+#ifndef MLC_LLM_GRAMMAR_GRAMMAR_BUILDER_H_
+#define MLC_LLM_GRAMMAR_GRAMMAR_BUILDER_H_
 #include <tvm/runtime/object.h>
 
 #include <cstdint>
@@ -38,7 +38,7 @@ class BNFGrammarBuilder {
    */
   BNFGrammar Get(const std::string& main_rule = "main") {
     int32_t main_rule_id = GetRuleId(main_rule);
-    CHECK(main_rule_id != -1) << "The in rule with name \"" << main_rule << "\" is not found.";
+    CHECK(main_rule_id != -1) << "The main rule with name \"" << main_rule << "\" is not found.";
     grammar_->main_rule_id_ = main_rule_id;
 
     return BNFGrammar(grammar_);
@@ -251,4 +251,4 @@ class BNFGrammarBuilder {
 }  // namespace llm
 }  // namespace mlc
 
-#endif  // MLC_LLM_SERVE_GRAMMAR_GRAMMAR_BUILDER_H_
+#endif  // MLC_LLM_GRAMMAR_GRAMMAR_BUILDER_H_
@@ -4,8 +4,8 @@
  * \brief The header for the simplification of the BNF AST.
  */
 
-#ifndef MLC_LLM_SERVE_GRAMMAR_GRAMMAR_FUNCTOR_H_
-#define MLC_LLM_SERVE_GRAMMAR_GRAMMAR_FUNCTOR_H_
+#ifndef MLC_LLM_GRAMMAR_GRAMMAR_FUNCTOR_H_
+#define MLC_LLM_GRAMMAR_GRAMMAR_FUNCTOR_H_
 
 #include <queue>
 #include <string>
@@ -216,4 +216,4 @@ class BNFGrammarNormalizer : public BNFGrammarMutator {
 }  // namespace llm
 }  // namespace mlc
 
-#endif  // MLC_LLM_SERVE_GRAMMAR_GRAMMAR_FUNCTOR_H_
+#endif  // MLC_LLM_GRAMMAR_GRAMMAR_FUNCTOR_H_
@@ -4,8 +4,8 @@
  * \brief The header for the parser of BNF/EBNF grammar into BNF AST.
  */
 
-#ifndef MLC_LLM_SERVE_GRAMMAR_GRAMMAR_PARSER_H_
-#define MLC_LLM_SERVE_GRAMMAR_GRAMMAR_PARSER_H_
+#ifndef MLC_LLM_GRAMMAR_GRAMMAR_PARSER_H_
+#define MLC_LLM_GRAMMAR_GRAMMAR_PARSER_H_
 
 #include <tvm/runtime/container/string.h>
 #include <tvm/runtime/logging.h>
@@ -65,4 +65,4 @@ class BNFJSONParser {
 }  // namespace llm
 }  // namespace mlc
 
-#endif  // MLC_LLM_SERVE_GRAMMAR_GRAMMAR_PARSER_H_
+#endif  // MLC_LLM_GRAMMAR_GRAMMAR_PARSER_H_
@@ -6,7 +6,6 @@
 #include "grammar_serializer.h"
 
 #include <picojson.h>
-#include <tvm/runtime/memory.h>
 #include <tvm/runtime/registry.h>
 
 #include "../support/encoding.h"
 
@@ -4,8 +4,8 @@
  * \brief The header for printing the AST of a BNF grammar.
  */
 
-#ifndef MLC_LLM_SERVE_GRAMMAR_GRAMMAR_SERIALIZER_H_
-#define MLC_LLM_SERVE_GRAMMAR_GRAMMAR_SERIALIZER_H_
+#ifndef MLC_LLM_GRAMMAR_GRAMMAR_SERIALIZER_H_
+#define MLC_LLM_GRAMMAR_GRAMMAR_SERIALIZER_H_
 
 #include <string>
 
@@ -114,4 +114,4 @@ class BNFGrammarJSONSerializer : public BNFGrammarSerializer {
 }  // namespace llm
 }  // namespace mlc
 
-#endif  // MLC_LLM_SERVE_GRAMMAR_GRAMMAR_SERIALIZER_H_
+#endif  // MLC_LLM_GRAMMAR_GRAMMAR_SERIALIZER_H_
@@ -246,8 +246,6 @@ void GrammarStateMatcherNodeImpl::FindNextTokenBitmask(DLTensor* next_token_bitm
   // {-1} means the universal set, i.e. all tokens initially
   tmp_rejected_indices_.assign({-1});
 
-  // std::chrono::microseconds time_unc(0);
-  // std::chrono::microseconds time_idx(0);
   int check_cnt = 0;
 
   for (auto top : latest_stack_tops) {
@@ -258,8 +256,6 @@ void GrammarStateMatcherNodeImpl::FindNextTokenBitmask(DLTensor* next_token_bitm
 
     const auto& catagorized_tokens = catagorized_tokens_for_grammar.at(cur_rule_position);
 
-    // auto start = std::chrono::high_resolution_clock::now();
-
     // For each stack, we will check every uncertain token and put them into the accepted or
     // rejected list.
 
@@ -277,35 +273,6 @@ void GrammarStateMatcherNodeImpl::FindNextTokenBitmask(DLTensor* next_token_bitm
     const std::string* prev_token = nullptr;
     int prev_matched_size = 0;
 
-    // std::cout << tree_.PrintNode(top) << std::endl;
-
-    // std::cout << "Accepted count: " << catagorized_tokens.accepted_indices.size()
-    //           << ", rejected count: " << catagorized_tokens.rejected_indices.size()
-    //           << ", uncertain count: " << catagorized_tokens.uncertain_indices.size()
-    //           << ", save type: " << static_cast<int>(catagorized_tokens.save_type) << std::endl;
-
-    // if (catagorized_tokens.accepted_indices.size() < 200) {
-    //   std::cout << "Accpeted: ";
-    //   for (int i = 0; i < catagorized_tokens.accepted_indices.size(); ++i) {
-    //     std::cout << "<"
-    //               << PrintAsEscaped(
-    //                      sorted_token_table[catagorized_tokens.accepted_indices[i]].second)
-    //               << "> ";
-    //   }
-    //   std::cout << "\n";
-    // }
-
-    // if (catagorized_tokens.uncertain_indices.size() > 100) {
-    // std::cout << "Uncertain: ";
-    // for (int i = 0; i < catagorized_tokens.uncertain_indices.size(); ++i) {
-    //   std::cout << "<"
-    //             << PrintAsEscaped(
-    //                    sorted_token_table[catagorized_tokens.uncertain_indices[i]].second)
-    //             << "> ";
-    // }
-    // std::cout << "\n";
-    // }
-
     for (auto cur_token_idx : catagorized_tokens.uncertain_indices) {
       const auto& cur_token = sorted_token_table[cur_token_idx].second;
       bool accepted = true;
@@ -354,13 +321,7 @@ void GrammarStateMatcherNodeImpl::FindNextTokenBitmask(DLTensor* next_token_bitm
 
     RollbackChars(prev_matched_size + 1);
 
-    // auto end = std::chrono::high_resolution_clock::now();
-
-    // time_unc += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
-
-    // start = std::chrono::high_resolution_clock::now();
-
-    // Step 3. Update the accepted_indices and rejected_indices
+    // Step 3. Update the accepted_indices or rejected_indices
     if (catagorized_tokens.save_type == SaveType::kAcceptedBitset) {
       tmp_accepted_bitset_ |= catagorized_tokens.accepted_bitset;
     } else if (catagorized_tokens.save_type == SaveType::kAccepted) {
@@ -374,19 +335,11 @@ void GrammarStateMatcherNodeImpl::FindNextTokenBitmask(DLTensor* next_token_bitm
       IntsetUnion(&tmp_rejected_indices_delta_, catagorized_tokens.rejected_indices);
       IntsetIntersection(&tmp_rejected_indices_, tmp_rejected_indices_delta_);
     }
-    // end = std::chrono::high_resolution_clock::now();
-    // time_idx += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
   }
 
   // Finally update the rejected_ids bitset
-  // auto start = std::chrono::high_resolution_clock::now();
   bool can_reach_end = CanReachEnd();
   SetTokenBitmask(next_token_bitmask, tmp_accepted_bitset_, tmp_rejected_indices_, can_reach_end);
-  // auto end = std::chrono::high_resolution_clock::now();
-  // time_idx += std::chrono::duration_cast<std::chrono::microseconds>(end - start);
-  // std::cout << "Time for uncertain: " << time_unc.count()
-  //           << "us, time for index: " << time_idx.count() << "us" << std::endl;
-  // std::cout << "Check cnt " << check_cnt << std::endl;
 }
 
 void GrammarStateMatcherNodeImpl::Rollback(int num_tokens) {
 
@@ -5,8 +5,8 @@
  * logic of the grammar-guided generation.
  */
 
-#ifndef MLC_LLM_SERVE_GRAMMAR_GRAMMAR_STATE_MATCHER_H_
-#define MLC_LLM_SERVE_GRAMMAR_GRAMMAR_STATE_MATCHER_H_
+#ifndef MLC_LLM_GRAMMAR_GRAMMAR_STATE_MATCHER_H_
+#define MLC_LLM_GRAMMAR_GRAMMAR_STATE_MATCHER_H_
 
 #include <tvm/runtime/object.h>
 #include <tvm/runtime/registry.h>
@@ -172,4 +172,4 @@ class GrammarInitContextCache : public ObjectRef {
 }  // namespace llm
 }  // namespace mlc
 
-#endif  // MLC_LLM_SERVE_GRAMMAR_GRAMMAR_STATE_MATCHER_H_
+#endif  // MLC_LLM_GRAMMAR_GRAMMAR_STATE_MATCHER_H_
@@ -3,8 +3,8 @@
  * \file grammar/grammar_state_matcher_base.h
  * \brief The base class of GrammarStateMatcher. It implements a character-based matching automata.
  */
-#ifndef MLC_LLM_SERVE_GRAMMAR_GRAMMAR_STATE_MATCHER_BASE_H_
-#define MLC_LLM_SERVE_GRAMMAR_GRAMMAR_STATE_MATCHER_BASE_H_
+#ifndef MLC_LLM_GRAMMAR_GRAMMAR_STATE_MATCHER_BASE_H_
+#define MLC_LLM_GRAMMAR_GRAMMAR_STATE_MATCHER_BASE_H_
 
 #include <vector>
 
@@ -109,7 +109,8 @@ class GrammarStateMatcherBase {
   // We store the stack tops in different steps in the history to support rollback.
   StackTopsHistory stack_tops_history_;
 
-  // Temporary data for AcceptChar.
+  // Temporary data for AcceptChar, PushInitialState, etc to store new stacks.
+  // They are stored here to avoid repeated allocation.
   std::vector<int32_t> tmp_new_stack_tops_;
 };
 
@@ -267,21 +268,21 @@ inline void GrammarStateMatcherBase::PushInitialState(RulePosition init_rule_pos
     // Initialize the stack with the main rule.
     auto main_rule = grammar_->GetMainRule();
     auto main_rule_body = grammar_->GetRuleExpr(main_rule.body_expr_id);
-    std::vector<int32_t> stack_tops;
+    tmp_new_stack_tops_.clear();
     for (auto i : main_rule_body) {
       auto init_rule_position = RulePosition(0, i, 0, RulePosition::kNoParent);
       if (expand_init_rule_position) {
-        ExpandRulePosition(init_rule_position, &stack_tops, true);
+        ExpandRulePosition(init_rule_position, &tmp_new_stack_tops_, true);
       } else {
-        stack_tops.push_back(tree_.NewNode(init_rule_position));
+        tmp_new_stack_tops_.push_back(tree_.NewNode(init_rule_position));
       }
     }
-    stack_tops_history_.PushHistory(stack_tops);
+    stack_tops_history_.PushHistory(tmp_new_stack_tops_);
   } else {
     if (expand_init_rule_position) {
-      std::vector<int32_t> stack_tops;
-      ExpandRulePosition(init_rule_position, &stack_tops, true);
-      stack_tops_history_.PushHistory(stack_tops);
+      tmp_new_stack_tops_.clear();
+      ExpandRulePosition(init_rule_position, &tmp_new_stack_tops_, true);
+      stack_tops_history_.PushHistory(tmp_new_stack_tops_);
     } else {
       stack_tops_history_.PushHistory({tree_.NewNode(init_rule_position)});
     }
@@ -397,4 +398,4 @@ inline bool GrammarStateMatcherBase::ExpandRulePosition(RulePosition cur_rule_po
 }  // namespace llm
 }  // namespace mlc
 
-#endif  // MLC_LLM_SERVE_GRAMMAR_GRAMMAR_STATE_MATCHER_BASE_H_
+#endif  // MLC_LLM_GRAMMAR_GRAMMAR_STATE_MATCHER_BASE_H_
@@ -3,8 +3,8 @@
  * \file grammar/grammar_state_matcher_preproc.h
  * \brief The header for the preprocessing of the grammar state matcher.
  */
-#ifndef MLC_LLM_SERVE_GRAMMAR_GRAMMAR_STATE_MATCHER_PREPROC_H_
-#define MLC_LLM_SERVE_GRAMMAR_GRAMMAR_STATE_MATCHER_PREPROC_H_
+#ifndef MLC_LLM_GRAMMAR_GRAMMAR_STATE_MATCHER_PREPROC_H_
+#define MLC_LLM_GRAMMAR_GRAMMAR_STATE_MATCHER_PREPROC_H_
 
 #include <vector>
 
@@ -309,6 +309,8 @@ inline std::shared_ptr<GrammarStateInitContext> GrammarStateMatcher::CreateInitC
 
   for (int i = 0; i < token_table.size(); ++i) {
     const auto& token = token_table[i];
+    // TODO(yixin): Now we detect stop tokens from the token string. We should be able to pass
+    // the stop token set in.
     // LLaMA2: </s>
     // LLaMA3: <|end_of_text|>, <|eot_id|>
     // Phi-2: <|endoftext|>
@@ -432,4 +434,4 @@ GrammarInitContextCache::GrammarInitContextCache(const std::vector<std::string>&
 }  // namespace llm
 }  // namespace mlc
 
-#endif  // TVM_LLVM_COMPILE_ENGINE_CPP_SERVE_GRAMMAR_STATE_MATCHER_PREPROC_H_
+#endif  // MLC_LLM_GRAMMAR_GRAMMAR_STATE_MATCHER_PREPROC_H_