Update auto_fp8/quantize.py

comaniac · web-flow · commit 2c70d7a53294 · 2024-05-23T13:07:02.000-07:00
diff --git a/auto_fp8/quantize.py b/auto_fp8/quantize.py
@@ -81,7 +81,7 @@ def fp8_gemm(A, A_scale, B, B_scale, bias, out_dtype):
             bias=bias,
         )
         if need_reshape:
-            output = output.reshape((batch_size, *output.shape))
+            output = output.reshape((batch_size, output.shape[0] // batch_size, output.shape[1]))
     else:
         output = torch.nn.functional.linear(
             A.to(out_dtype) * A_scale,

Original file line number	Diff line number	Diff line change
`@@ -81,7 +81,7 @@ def fp8_gemm(A, A_scale, B, B_scale, bias, out_dtype):`
`81`	`81`	`bias=bias,`
`82`	`82`	`)`
`83`	`83`	`if need_reshape:`
`84`		`- output = output.reshape((batch_size, *output.shape))`
	`84`	`+ output = output.reshape((batch_size, output.shape[0] // batch_size, output.shape[1]))`
`85`	`85`	`else:`
`86`	`86`	`output = torch.nn.functional.linear(`
`87`	`87`	`A.to(out_dtype) * A_scale,`