We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 6db9457 commit b5269dbCopy full SHA for b5269db
vllm/model_executor/layers/quantization/kernels/mixed_precision/marlin.py
@@ -115,10 +115,6 @@ def apply_weights(self,
115
layer: torch.nn.Module,
116
x: torch.Tensor,
117
bias: Optional[torch.Tensor] = None) -> torch.Tensor:
118
- # marlin requires contiguous memory layout
119
- # prefix caching may cause x to be non-contiguous
120
- x = x.contiguous() # no-op if already contiguous
121
-
122
c = self.config
123
w_q, w_s, w_zp, w_gidx = self._get_weight_params(layer)
124
0 commit comments