We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
1 parent 4d1eb0c commit c391942Copy full SHA for c391942
modelopt/torch/quantization/qtensor/nvfp4_tensor.py
@@ -19,7 +19,6 @@
19
20
from ..backends.utils import fp4_compatible
21
from ..qtensor.base_qtensor import BaseQuantizedTensor
22
-from ..triton.fp4_kernel import fp4_dequantize
23
from ..utils import reduce_amax, reduce_block_amax, reduce_block_padding
24
25
# Define conversion tables
@@ -266,6 +265,8 @@ def _unpack_tensor(input: torch.Tensor):
266
265
) from e
267
268
if fast:
+ from ..triton.fp4_kernel import fp4_dequantize
269
+
270
return fp4_dequantize(
271
self._quantized_data,
272
kwarg["scale"],
0 commit comments