1919#include " fsm.h"
2020#include " grammar_functor.h"
2121#include " grammar_impl.h"
22+ #include " grammar_matcher_for_cache.h"
2223#include " support/logging.h"
2324#include " support/thread_pool.h"
2425#include " support/thread_safe_cache.h"
2526#include " support/utils.h"
2627
2728namespace xgrammar {
2829
29- /* ************* AdaptiveTokenMaskCache Generator **************/
30-
31- /* ! \brief The concrete implementation of GrammarMatcherNode. */
32- class GrammarMatcherForTokenMaskCache : public EarleyParser {
33- public:
34- GrammarMatcherForTokenMaskCache (
35- const Grammar& grammar, const ParserState& init_state, const bool & need_expand = true
36- )
37- : EarleyParser(grammar, init_state),
38- init_rule_id (init_state.rule_id),
39- initial_state(init_state) {}
40- /* !
41- * \brief Get the adaptive token mask for the given ParserState.
42- * \param is_root_rule Whether to consider the parent rule. If false, there will be
43- * no uncertain tokens. Useful for the root rule.
44- */
45- AdaptiveTokenMask GetAdaptiveTokenMask (
46- size_t vocab_size,
47- const std::vector<std::pair<int32_t , std::string>>& sorted_decoded_vocab,
48- const std::vector<int32_t >& subtree_nodes_range,
49- bool is_root_rule
50- );
51-
52- /* !
53- * \brief Get the token mask for the given ParserState.
54- * \param sorted_decoded_vocab The sorted decoded vocabulary.
55- * \param first_char_mask The first character mask.
56- * \param is_root_rule Whether to consider the parent rule. If false, there will be
57- * no uncertain tokens. Useful for the root rule.
58- * \returns True if the rejected indices are filled as usual, False otherwise.
59- * It's used to determine which construction function will be used.
60- */
61- bool GetTokenMaskWithFirstCharacterCheck (
62- const std::vector<std::pair<int32_t , std::string>>& sorted_decoded_vocab,
63- const std::bitset<256 >& first_char_mask,
64- const std::vector<int >& subtree_nodes_range,
65- bool is_root_rule
66- );
67-
68- private:
69- /* ! \brief Check if a token can pass the lookahead assertion. */
70- std::pair</* acceptable*/ bool , /* can reach end*/ bool > IsTokenPassLookaheadAssertion (
71- const std::string& token, const std::vector<bool >& can_reach_end_stack
72- );
73-
74- /* !
75- * \brief Check if speculative calculation will be applied.
76- * \return first: whether speculative calculation is applicable.
77- * \return second: part of the first character mask,
78- * which can be used in speculative calculation.
79- */
80- std::pair<bool , std::bitset<256 >> GetSpeculativeCalculation (
81- const std::vector<std::pair<int32_t , std::string>>& sorted_decoded_vocab
82- );
83-
84- // The id of the initial rule.
85- int32_t init_rule_id;
86-
87- // The initial state of the parser.
88- ParserState initial_state;
89-
90- // Temporary data for GetAdaptiveTokenMask.
91- std::vector<int32_t > tmp_accepted_indices_;
92- std::vector<int32_t > tmp_rejected_indices_;
93- std::vector<int32_t > tmp_uncertain_indices_;
94- std::vector<bool > tmp_can_reach_end_stack_;
95- std::vector<bool > tmp_can_reach_end_prefix_or_stack_;
96- };
30+ /* ************* Use GrammarMatcher to generate the AdaptiveTokenMaskCache **************/
9731
9832std::pair<bool , bool > GrammarMatcherForTokenMaskCache::IsTokenPassLookaheadAssertion (
9933 const std::string& token, const std::vector<bool >& can_reach_end_stack
@@ -543,8 +477,8 @@ AdaptiveTokenMask GrammarMatcherForTokenMaskCache::GetAdaptiveTokenMask(
543477 */
544478class GrammarCompilerNoCache {
545479 public:
546- GrammarCompilerNoCache (const TokenizerInfo& tokenizer_info, int max_threads)
547- : tokenizer_info_(tokenizer_info), max_threads_(max_threads) {}
480+ GrammarCompilerNoCache (const TokenizerInfo& tokenizer_info, int max_threads, bool is_jit )
481+ : tokenizer_info_(tokenizer_info), max_threads_(max_threads), is_jit_(is_jit) {}
548482
549483 CompiledGrammar CompileBuiltinJSONGrammar ();
550484
@@ -573,6 +507,8 @@ class GrammarCompilerNoCache {
573507 const TokenizerInfo tokenizer_info_;
574508 /* ! \brief The maximum number of threads to use. */
575509 const int max_threads_;
510+ /* ! \brief Whether the jit mode is enabled.*/
511+ const bool is_jit_;
576512};
577513
578514CompiledGrammar GrammarCompilerNoCache::MultiThreadCompileGrammar (Grammar grammar) {
@@ -588,6 +524,9 @@ CompiledGrammar GrammarCompilerNoCache::MultiThreadCompileGrammar(Grammar gramma
588524 if (tokenizer_info_.GetVocabSize () == 0 ) {
589525 return CompiledGrammar (compiled_grammar_impl);
590526 }
527+ if (is_jit_) {
528+ return CompiledGrammar (compiled_grammar_impl);
529+ }
591530 // Step 3. Compute the adaptive token mask cache
592531 // The token mask cache is computed for these positions in the grammar:
593532 // 1. All character class or character class star (with last_utf8_bytes=0, 1, 2, 3)
@@ -827,9 +766,10 @@ class GrammarCompiler::Impl {
827766 const TokenizerInfo& tokenizer_info,
828767 int max_threads,
829768 bool cache_enabled,
830- int64_t max_memory_bytes
769+ int64_t max_memory_bytes,
770+ bool is_jit
831771 )
832- : no_cache_compiler_(tokenizer_info, max_threads),
772+ : no_cache_compiler_(tokenizer_info, max_threads, is_jit ),
833773 cache_enabled_ (cache_enabled),
834774 compile_cache_(static_cast <std::size_t >(max_memory_bytes), Computer(*this )) {
835775 if (max_memory_bytes < -1 ) {
@@ -997,9 +937,16 @@ GrammarCompiler::GrammarCompiler(
997937 const TokenizerInfo& tokenizer_info,
998938 int max_threads,
999939 bool cache_enabled,
1000- int64_t max_memory_bytes
940+ int64_t max_memory_bytes,
941+ bool is_jit
1001942)
1002- : pimpl_(std::make_shared<Impl>(tokenizer_info, max_threads, cache_enabled, max_memory_bytes)) {
943+ : pimpl_(std::make_shared<Impl>(
944+ tokenizer_info, max_threads, cache_enabled, max_memory_bytes, is_jit
945+ )) {
946+ if (max_memory_bytes < -1 ) {
947+ XGRAMMAR_LOG (FATAL) << " Invalid max_memory_bytes: " << max_memory_bytes << " . "
948+ << " It should be -1 (unlimited) or a non-negative integer." ;
949+ }
1003950}
1004951
1005952CompiledGrammar GrammarCompiler::CompileJSONSchema (
0 commit comments