Skip to content

Commit 7941b47

Browse files
committed
fix
1 parent 79b4d00 commit 7941b47

File tree

4 files changed

+21
-20
lines changed

4 files changed

+21
-20
lines changed

lightllm/server/api_cli.py

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -184,9 +184,9 @@ def make_argument_parser() -> argparse.ArgumentParser:
184184
disabling it allows the router_max_wait_tokens parameter to work more effectively.""",
185185
)
186186

187-
parser.add_argument("--use_dynamic_prompt_cache",
188-
action="store_true",
189-
help="This argument is deprecated and no longer in use.")
187+
parser.add_argument(
188+
"--use_dynamic_prompt_cache", action="store_true", help="This argument is deprecated and no longer in use."
189+
)
190190
parser.add_argument("--disable_dynamic_prompt_cache", action="store_true", help="disable dynamic prompt cache")
191191

192192
parser.add_argument("--chunked_prefill_size", type=int, default=8192, help="chunked prefill size")

lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_outlines_constraint_mode.py

Lines changed: 8 additions & 8 deletions
Original file line numberDiff line numberDiff line change
@@ -43,7 +43,7 @@ def init_custom(self):
4343
self.tokenizer.eos_token_ids = eos_token_ids
4444
logger.info(f"eos_ids {self.tokenizer.eos_token_ids}")
4545
return
46-
46+
4747
def decode(self):
4848
uninit_reqs, aborted_reqs, ok_finished_reqs, prefill_reqs, decode_reqs = self._get_classed_reqs(
4949
g_infer_context.infer_req_ids
@@ -59,7 +59,7 @@ def decode(self):
5959
self._overlap_req_init_and_filter(
6060
uninit_reqs=uninit_reqs, ok_finished_reqs=ok_finished_reqs, clear_list=True
6161
)
62-
62+
6363
self._init_guide_infos(run_reqs)
6464
all_has_no_constraint = all([not e.sampling_param.has_constraint_setting() for e in run_reqs])
6565
if not all_has_no_constraint:
@@ -105,9 +105,9 @@ def decode(self):
105105
next_token_logprobs = torch.log(next_token_probs).detach().cpu().numpy()
106106
self._post_handle(
107107
run_reqs,
108-
next_token_ids,
109-
next_token_logprobs,
110-
is_chuncked_mode=True,
108+
next_token_ids,
109+
next_token_logprobs,
110+
is_chuncked_mode=True,
111111
do_filter_finished_reqs=False,
112112
extra_post_req_handle_func=self._update_state_fsm,
113113
)
@@ -143,10 +143,10 @@ def _mask_req_out_token(self, i, run_obj: InferReq, mask):
143143
mask[i, :] = False
144144
else:
145145
# no constraint
146-
mask[i, :] = False
146+
mask[i, :] = False
147147
return
148-
149-
def _init_guide_infos(self, run_reqs:List[InferReq]):
148+
149+
def _init_guide_infos(self, run_reqs: List[InferReq]):
150150
from outlines.fsm.guide import RegexGuide
151151

152152
for i, run_obj in enumerate(run_reqs):

lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_token_healing.py

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12,6 +12,7 @@
1212

1313
logger = init_logger(__name__)
1414

15+
1516
class TokenHealingBackend(ChunkedPrefillBackend):
1617
def __init__(self) -> None:
1718
super().__init__()
@@ -38,7 +39,7 @@ def init_custom(self):
3839
)
3940
self.token_indexes = torch.tensor([e[1] for e in self.sorted_tokens], dtype=torch.int64, device="cuda")
4041
return
41-
42+
4243
def decode(self):
4344
uninit_reqs, aborted_reqs, ok_finished_reqs, prefill_reqs, decode_reqs = self._get_classed_reqs(
4445
g_infer_context.infer_req_ids
@@ -56,7 +57,7 @@ def decode(self):
5657
)
5758

5859
self._init_prefix_infos(run_reqs=run_reqs)
59-
60+
6061
all_no_prefix = all([len(e.prefix_str) == 0 for e in run_reqs])
6162
if not all_no_prefix:
6263
mask = torch.ones_like(logits, dtype=torch.bool)
@@ -182,7 +183,7 @@ def _topk_recover(self, run_reqs: list[InferReq]):
182183
for req_obj in run_reqs:
183184
req_obj.sampling_param.shm_param.top_k = req_obj.origin_topk
184185
return
185-
186+
186187
def _init_prefix_infos(self, run_reqs: List[InferReq]):
187188
for i, run_obj in enumerate(run_reqs):
188189
if not hasattr(run_obj, "prefix_str"):

lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_xgrammar_mode.py

Lines changed: 6 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,10 +35,10 @@ def init_custom(self):
3535
eos_token_ids.append(self.tokenizer.eos_token_id)
3636
eos_token_ids.extend(self.args.eos_id)
3737
return
38-
38+
3939
@calculate_time(show=False, min_cost_ms=300)
4040
def decode(self):
41-
41+
4242
uninit_reqs, aborted_reqs, ok_finished_reqs, prefill_reqs, decode_reqs = self._get_classed_reqs(
4343
g_infer_context.infer_req_ids
4444
)
@@ -86,7 +86,7 @@ def decode(self):
8686
self._overlap_req_init_and_filter(
8787
uninit_reqs=uninit_reqs, ok_finished_reqs=ok_finished_reqs, clear_list=True
8888
)
89-
89+
9090
self._init_req_xgrammer_matcher_infos(run_reqs=run_reqs)
9191
for i, run_obj in enumerate(run_reqs):
9292
self._mask_req_out_token(i, run_obj, logits[i])
@@ -125,15 +125,15 @@ def _update_xgrammer_fsm(self, req_obj: InferReq, next_token_id, next_token_logp
125125

126126
def _mask_req_out_token(self, i, run_obj: InferReq, logits):
127127
import xgrammar as xgr
128-
128+
129129
if run_obj.get_chuncked_input_token_len() == run_obj.get_cur_total_len():
130130
sample_params = run_obj.sampling_param
131131
if sample_params.guided_grammar is not None or sample_params.guided_json is not None:
132132
sample_params.xgrammar_matcher.fill_next_token_bitmask(self.xgrammar_token_bitmask)
133133
xgr.apply_token_bitmask_inplace(logits, self.xgrammar_token_bitmask.to(logits.device))
134134
return
135-
136-
def _init_req_xgrammer_matcher_infos(self, run_reqs:List[InferReq]):
135+
136+
def _init_req_xgrammer_matcher_infos(self, run_reqs: List[InferReq]):
137137
import xgrammar as xgr
138138

139139
for i, run_obj in enumerate(run_reqs):

0 commit comments

Comments
 (0)