We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 1705501 commit 68c5401Copy full SHA for 68c5401
vllm_ascend/spec_decode/eagle_proposer.py
@@ -1,5 +1,4 @@
1
# SPDX-License-Identifier: Apache-2.0
2
-import os
3
from typing import Optional
4
5
import numpy as np
@@ -72,8 +71,7 @@ def __init__(self,
72
71
1,
73
device=device,
74
dtype=torch.int32)
75
- attn_mask_len = min(self.vllm_config.model_config.max_model_len,
76
- int(os.getenv("PAGED_ATTENTION_MASK_LEN", 10000)))
+ attn_mask_len = self.vllm_config.model_config.max_model_len
77
self.attn_mask_builder = AttentionMaskBuilder(
78
attn_mask_len, self.vllm_config.model_config.dtype)
79
0 commit comments