diff --git a/torchchat/utils/quantize.py b/torchchat/utils/quantize.py index b90d098b3..c4ecfbe6f 100644 --- a/torchchat/utils/quantize.py +++ b/torchchat/utils/quantize.py @@ -747,7 +747,7 @@ def et_forward(self, indices: torch.Tensor) -> torch.Tensor: ) else: return torch.ops.quantized_decomposed.embedding_4bit.dtype( - self.weight, self.scales, None, 0, 0, indices, dtype=self.dtype + self.weight, self.scales, None, -8, 7, indices, dtype=self.dtype ) @torch.no_grad()