From f2840cbb1b1a77f4a713df3772cecb4f14fb5391 Mon Sep 17 00:00:00 2001 From: Junyi Chen Date: Fri, 20 Jun 2025 20:42:02 +0800 Subject: [PATCH 1/5] add cache --- .../chunked_prefill/impl_for_xgrammar_mode.py | 22 +++++++++++++++---- 1 file changed, 18 insertions(+), 4 deletions(-) diff --git a/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py b/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py index 8cdd840e6..5f2848b84 100644 --- a/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py +++ b/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py @@ -1,3 +1,5 @@ +import copy +import functools import torch from typing import List, Tuple @@ -34,6 +36,18 @@ def init_custom(self): eos_token_ids = [] eos_token_ids.append(self.tokenizer.eos_token_id) eos_token_ids.extend(self.args.eos_id) + + @functools.lru_cache(maxsize=200) + def dispatch_grammar(type: str, grammar: str): + logger.info(f"grammar cache miss for {type}: '{grammar}'") + if type == "grammar": + return self.xgrammar_compiler.compile_grammar(grammar) + elif type == "schema": + return self.xgrammar_compiler.compile_json_schema(grammar) + else: + raise ValueError(f"Unknown xgrammar type: {type}") + + self.dispatch_grammar = dispatch_grammar return @calculate_time(show=False, min_cost_ms=300) @@ -149,10 +163,10 @@ def _init_req_xgrammer_matcher_infos(self, run_reqs: List[InferReq]): sample_params = run_obj.sampling_param if sample_params.guided_grammar is not None: if not hasattr(sample_params, "xgrammar_matcher"): - xgrammar_compiled_grammar = self.xgrammar_compiler.compile_grammar(sample_params.guided_grammar) - sample_params.xgrammar_matcher = xgr.GrammarMatcher(xgrammar_compiled_grammar) + ctx = self.dispatch_grammar("grammar", sample_params.guided_grammar) + sample_params.xgrammar_matcher = xgr.GrammarMatcher(ctx) elif sample_params.guided_json is not None: if not hasattr(sample_params, "xgrammar_matcher"): - xgrammar_compiled_grammar = self.xgrammar_compiler.compile_json_schema(sample_params.guided_json) - sample_params.xgrammar_matcher = xgr.GrammarMatcher(xgrammar_compiled_grammar) + ctx = self.dispatch_grammar("schema", sample_params.guided_json) + sample_params.xgrammar_matcher = xgr.GrammarMatcher(ctx) return From 4ae114c8d70dc97af2e63d63ee1d287ec931dee0 Mon Sep 17 00:00:00 2001 From: FlyingFlame Date: Fri, 20 Jun 2025 20:44:09 +0800 Subject: [PATCH 2/5] Update lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .../chunked_prefill/impl_for_xgrammar_mode.py | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) diff --git a/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py b/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py index 5f2848b84..4077a4df0 100644 --- a/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py +++ b/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py @@ -40,12 +40,17 @@ def init_custom(self): @functools.lru_cache(maxsize=200) def dispatch_grammar(type: str, grammar: str): logger.info(f"grammar cache miss for {type}: '{grammar}'") - if type == "grammar": - return self.xgrammar_compiler.compile_grammar(grammar) - elif type == "schema": - return self.xgrammar_compiler.compile_json_schema(grammar) - else: - raise ValueError(f"Unknown xgrammar type: {type}") + try: + if type == "grammar": + return self.xgrammar_compiler.compile_grammar(grammar) + elif type == "schema": + return self.xgrammar_compiler.compile_json_schema(grammar) + else: + raise ValueError(f"Unknown xgrammar type: {type}") + except Exception as e: + logger.error(f"Failed to compile {type}: {e}") + raise + self.dispatch_grammar = dispatch_grammar return From d6db895b0a6a2eaf9f08f3a21901243e5b5ca149 Mon Sep 17 00:00:00 2001 From: FlyingFlame Date: Fri, 20 Jun 2025 20:44:36 +0800 Subject: [PATCH 3/5] Update lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py Co-authored-by: gemini-code-assist[bot] <176961590+gemini-code-assist[bot]@users.noreply.github.com> --- .../chunked_prefill/impl_for_xgrammar_mode.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py b/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py index 4077a4df0..6e13cc30f 100644 --- a/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py +++ b/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py @@ -172,6 +172,11 @@ def _init_req_xgrammer_matcher_infos(self, run_reqs: List[InferReq]): sample_params.xgrammar_matcher = xgr.GrammarMatcher(ctx) elif sample_params.guided_json is not None: if not hasattr(sample_params, "xgrammar_matcher"): - ctx = self.dispatch_grammar("schema", sample_params.guided_json) - sample_params.xgrammar_matcher = xgr.GrammarMatcher(ctx) + try: + ctx = self.dispatch_grammar("schema", sample_params.guided_json) + sample_params.xgrammar_matcher = xgr.GrammarMatcher(ctx) + except Exception as e: + logger.error(f"Failed to compile schema: {e}") + # Handle the error appropriately, e.g., by setting a default matcher or skipping the schema + continue return From 327d50b95d4fc27ed393101613c440171f32efe5 Mon Sep 17 00:00:00 2001 From: Junyi Chen Date: Fri, 20 Jun 2025 20:47:51 +0800 Subject: [PATCH 4/5] fix --- .../chunked_prefill/impl_for_xgrammar_mode.py | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) diff --git a/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py b/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py index 6e13cc30f..d281874be 100644 --- a/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py +++ b/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py @@ -51,7 +51,6 @@ def dispatch_grammar(type: str, grammar: str): logger.error(f"Failed to compile {type}: {e}") raise - self.dispatch_grammar = dispatch_grammar return @@ -172,11 +171,6 @@ def _init_req_xgrammer_matcher_infos(self, run_reqs: List[InferReq]): sample_params.xgrammar_matcher = xgr.GrammarMatcher(ctx) elif sample_params.guided_json is not None: if not hasattr(sample_params, "xgrammar_matcher"): - try: - ctx = self.dispatch_grammar("schema", sample_params.guided_json) - sample_params.xgrammar_matcher = xgr.GrammarMatcher(ctx) - except Exception as e: - logger.error(f"Failed to compile schema: {e}") - # Handle the error appropriately, e.g., by setting a default matcher or skipping the schema - continue + ctx = self.dispatch_grammar("schema", sample_params.guided_json) + sample_params.xgrammar_matcher = xgr.GrammarMatcher(ctx) return From 499dc3ac481ed5a83f72f388a13b92043ca5d9f3 Mon Sep 17 00:00:00 2001 From: wangzaijun Date: Sat, 21 Jun 2025 08:34:40 +0800 Subject: [PATCH 5/5] fix --- .../chunked_prefill/impl_for_xgrammar_mode.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py b/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py index d281874be..72172431f 100644 --- a/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py +++ b/lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py @@ -38,7 +38,7 @@ def init_custom(self): eos_token_ids.extend(self.args.eos_id) @functools.lru_cache(maxsize=200) - def dispatch_grammar(type: str, grammar: str): + def get_cached_grammar(type: str, grammar: str): logger.info(f"grammar cache miss for {type}: '{grammar}'") try: if type == "grammar": @@ -51,7 +51,7 @@ def dispatch_grammar(type: str, grammar: str): logger.error(f"Failed to compile {type}: {e}") raise - self.dispatch_grammar = dispatch_grammar + self.get_cached_grammar = get_cached_grammar return @calculate_time(show=False, min_cost_ms=300) @@ -167,10 +167,10 @@ def _init_req_xgrammer_matcher_infos(self, run_reqs: List[InferReq]): sample_params = run_obj.sampling_param if sample_params.guided_grammar is not None: if not hasattr(sample_params, "xgrammar_matcher"): - ctx = self.dispatch_grammar("grammar", sample_params.guided_grammar) + ctx = self.get_cached_grammar("grammar", sample_params.guided_grammar) sample_params.xgrammar_matcher = xgr.GrammarMatcher(ctx) elif sample_params.guided_json is not None: if not hasattr(sample_params, "xgrammar_matcher"): - ctx = self.dispatch_grammar("schema", sample_params.guided_json) + ctx = self.get_cached_grammar("schema", sample_params.guided_json) sample_params.xgrammar_matcher = xgr.GrammarMatcher(ctx) return