File tree Expand file tree Collapse file tree 2 files changed +4
-4
lines changed Expand file tree Collapse file tree 2 files changed +4
-4
lines changed Original file line number Diff line number Diff line change @@ -252,7 +252,7 @@ def flash_dynamic_mask_attention_forward(
252252- softcap: Softcap value for attention scores
253253- ** kwargs: Additional arguments including:
254254 - is_causal: Whether to apply causal mask
255- - keep_window_size : Size of window to keep
255+ - window_size : Size of window to keep
256256 - layer_idx: Layer index for logging
257257 - implementation: Implementation to use (" flash_dmattn" or None )
258258
@@ -279,7 +279,7 @@ class DynamicMaskAttention(nn.Module):
279279 self .num_key_value_groups = config.num_attention_heads // config.num_key_value_heads
280280 self .scaling = self .head_dim**- 0.5
281281 self .attention_dropout = config.attention_dropout
282- self .keep_window_size = config.keep_window_size
282+ self .window_size = config.window_size
283283 self .is_causal = True
284284
285285 self .q_proj = nn.Linear(
Original file line number Diff line number Diff line change @@ -251,7 +251,7 @@ def flash_dynamic_mask_attention_forward(
251251- softcap: 注意力分数的 softcap 值
252252- ** kwargs: 额外参数,包括:
253253 - is_causal: 是否应用因果掩码
254- - keep_window_size : 保持的窗口大小
254+ - window_size : 保持的窗口大小
255255 - layer_idx: 用于日志的层索引
256256 - implementation: 使用的实现(" flash_dmattn" 或 None )
257257
@@ -278,7 +278,7 @@ class DynamicMaskAttention(nn.Module):
278278 self .num_key_value_groups = config.num_attention_heads // config.num_key_value_heads
279279 self .scaling = self .head_dim**- 0.5
280280 self .attention_dropout = config.attention_dropout
281- self .keep_window_size = config.keep_window_size
281+ self .window_size = config.window_size
282282 self .is_causal = True
283283
284284 self .q_proj = nn.Linear(
You can’t perform that action at this time.
0 commit comments