We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 358c328 commit 4934d49Copy full SHA for 4934d49
vllm/model_executor/models/gpt_neox.py
@@ -54,6 +54,7 @@ def __init__(
54
self.total_num_heads = config.num_attention_heads
55
self.hidden_size = config.hidden_size
56
self.head_size = self.hidden_size // self.total_num_heads
57
+ self.bias = getattr(config, "attention_bias", True)
58
59
tensor_model_parallel_world_size = (
60
get_tensor_model_parallel_world_size())
@@ -65,11 +66,13 @@ def __init__(
65
66
config.hidden_size,
67
self.head_size,
68
self.total_num_heads,
69
+ bias=self.bias,
70
linear_method=linear_method,
71
)
72
self.dense = RowParallelLinear(
73
74
75
76
77
78
scaling = self.head_size**-0.5
0 commit comments