Skip to content

Commit c5a04b8

Browse files
committed
Always pass weight tensor to avoid allocating on hotpath
Signed-off-by: Luka Govedič <lgovedic@redhat.com>
1 parent 1ae94ed commit c5a04b8

File tree

1 file changed

+2
-4
lines changed

1 file changed

+2
-4
lines changed

vllm/model_executor/layers/layernorm.py

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -271,10 +271,9 @@ def forward_native(
271271
) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
272272
"""PyTorch-native implementation equivalent to forward()."""
273273
add_residual = residual is not None
274-
weight = self.weight.data if self.has_weight else None
275274
if not add_residual:
276275
return ir.ops.rms_norm(
277-
x, weight, self.variance_epsilon, self.variance_size_override
276+
x, self.weight, self.variance_epsilon, self.variance_size_override
278277
)
279278

280279
return self.forward_static(
@@ -293,10 +292,9 @@ def forward_cuda(
293292
residual: torch.Tensor | None = None,
294293
) -> torch.Tensor | tuple[torch.Tensor, torch.Tensor]:
295294
add_residual = residual is not None
296-
weight = self.weight.data if self.has_weight else None
297295
if not add_residual and not vllm_is_batch_invariant():
298296
return ir.ops.rms_norm(
299-
x, weight, self.variance_epsilon, self.variance_size_override
297+
x, self.weight, self.variance_epsilon, self.variance_size_override
300298
)
301299

302300
if self.variance_size_override is not None:

0 commit comments

Comments
 (0)