Fix README example: use randn for attn_bias instead of ones

LoserCheems · LoserCheems · commit 84b985ee33e3 · 2025-10-23T22:00:30.000+08:00
diff --git a/README.md b/README.md
@@ -169,7 +169,7 @@ key = torch.randn(batch_size, seq_len, num_kv_heads, head_dim, device=device, dt
 value = torch.randn(batch_size, seq_len, num_kv_heads, head_dim, device=device, dtype=dtype)
 
 # Create bias for sparse attention
-attn_bias = torch.ones(batch_size, num_kv_heads, seq_len, seq_len, device=device, dtype=dtype)
+attn_bias = torch.randn(batch_size, num_kv_heads, seq_len, seq_len, device=device, dtype=dtype)
 
 # Generate dynamic mask based on bias
 if seq_len > window_size: