fix tests

chichun-charlie-liu · chichun-charlie-liu · commit 19f3ae3d71f8 · 2025-02-18T19:19:57.000Z
Signed-off-by: cliu-us &lt;cliu@us.ibm.com&gt;
diff --git a/fms_mo/custom_ext_kernels/triton_kernels.py b/fms_mo/custom_ext_kernels/triton_kernels.py
@@ -278,7 +278,7 @@ def tl_matmul_chunk_truncate(
     activation="",
     chunk_trun_bits=0,
     chunk_size=16,
-    cast_output_to_input_dtype=True,
+    cast_output_to_input_dtype=None,
 ):
     """Triton matmul for HW behavior simulation. Supports float and int8.
     a. variable chunk size (i.e., BLOCK_SIZE_K)
@@ -291,8 +291,7 @@ def tl_matmul_chunk_truncate(
         chunk_size (int, optional): BLOCK_SIZE_K, some HW has specific chunk size. must >= 16.
         cast_output_to_input_dtype (bool, optional): accumulator has higher prec than input, usually
                                                     FP32 or INT32. by default we cast the final
-                                                    output to the same dtype as input, but can be
-                                                    changed if needed.
+                                                    output to the same dtype as input for non-8bits.
 
     Returns:
         _type_: _description_
@@ -306,6 +305,8 @@ def tl_matmul_chunk_truncate(
     assert a.is_contiguous(), "Matrix A must be contiguous"
     assert a.dtype == b.dtype, "Input dtypes inconsistent"
 
+    if cast_output_to_input_dtype is None:
+        cast_output_to_input_dtype = a.dtype not in DTYPE_8BIT
     allowed_dtypes = [torch.float, torch.bfloat16, torch.float16]
     cuda_cc = torch.cuda.get_device_capability()
     if cuda_cc[0] >= 8:
diff --git a/fms_mo/run_quant.py b/fms_mo/run_quant.py
@@ -281,6 +281,10 @@ def parse_arguments(parser, json_config=None):
             _,
         ) = parser.parse_args_into_dataclasses(return_remaining_strings=True)
 
+    model_args.torch_dtype = getattr(
+        torch, model_args.torch_dtype.replace("torch.", ""), torch.bfloat16
+    )
+
     return (
         model_args,
         data_args,
@@ -307,7 +311,6 @@ def main():
             gptq_args,
             fp8_args,
         ) = parse_arguments(parser, job_config)
-        model_args.torch_dtype = getattr(torch, model_args.torch_dtype, torch.bfloat16)
 
         logger = set_log_level(opt_args.log_level, __name__)