Skip to content

Commit e123d38

Browse files
committed
Merge branch 'deepep' of https://github.com/ModelTC/lightllm into deepep
2 parents a0bf0ff + 55d01f1 commit e123d38

File tree

1 file changed

+5
-1
lines changed

1 file changed

+5
-1
lines changed

lightllm/server/router/model_infer/mode_backend/chunked_prefill/impl_for_first_token_constraint_mode.py

Lines changed: 5 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ def init_custom(self):
2828
logger.info(f"first_allowed_tokens : {self.first_allowed_tokens}")
2929
# check token_id < vocab_size
3030
assert all(e < self.model.vocab_size for e in self.first_allowed_tokens)
31+
self.fill_value = torch.tensor(-1000000.0)
3132
return
3233

3334
def decode(self):
@@ -92,5 +93,8 @@ def _mask_first_gen_token_logits(self, run_reqs: List[InferReq], logits: torch.T
9293
mask[i, :] = True
9394
mask[i, self.first_allowed_tokens] = False
9495
torch.cuda.current_stream().wait_stream(g_infer_context.get_overlap_stream())
95-
logits[mask] = -1000000.0
96+
# 不能使用 logits[mask] = -1000000.0
97+
# 会存在诡异的多流异步问题, 可能是torch的bug
98+
new_logits = torch.where(mask, self.fill_value, logits)
99+
logits.copy_(new_logits)
96100
return

0 commit comments

Comments
 (0)