File tree Expand file tree Collapse file tree 1 file changed +2
-2
lines changed
Expand file tree Collapse file tree 1 file changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -427,12 +427,12 @@ def fp8_linear(self, input):
427427 input = torch .clamp (input , min = - 448 , max = 448 , out = input )
428428 input_fp8 = input .to (dtype ).contiguous ()
429429 layout_params_input = TensorCoreFP8Layout .Params (scale = scale_input , orig_dtype = input_dtype , orig_shape = tuple (input_fp8 .shape ))
430- quantized_input = QuantizedTensor (input_fp8 , TensorCoreFP8Layout , layout_params_input )
430+ quantized_input = QuantizedTensor (input_fp8 , " TensorCoreFP8Layout" , layout_params_input )
431431
432432 # Wrap weight in QuantizedTensor - this enables unified dispatch
433433 # Call F.linear - __torch_dispatch__ routes to fp8_linear handler in quant_ops.py!
434434 layout_params_weight = TensorCoreFP8Layout .Params (scale = scale_weight , orig_dtype = input_dtype , orig_shape = tuple (w .shape ))
435- quantized_weight = QuantizedTensor (w , TensorCoreFP8Layout , layout_params_weight )
435+ quantized_weight = QuantizedTensor (w , " TensorCoreFP8Layout" , layout_params_weight )
436436 o = torch .nn .functional .linear (quantized_input , quantized_weight , bias )
437437
438438 uncast_bias_weight (self , w , bias , offload_stream )
You can’t perform that action at this time.
0 commit comments