File tree Expand file tree Collapse file tree 2 files changed +2
-2
lines changed
modelopt/torch/_deploy/utils Expand file tree Collapse file tree 2 files changed +2
-2
lines changed Original file line number Diff line number Diff line change @@ -123,7 +123,7 @@ def main():
123
123
"--batch_size" ,
124
124
type = int ,
125
125
default = 1 ,
126
- help = "Batch size for calibration." ,
126
+ help = "Batch size for calibration and ONNX model export ." ,
127
127
)
128
128
129
129
args = parser .parse_args ()
Original file line number Diff line number Diff line change @@ -486,7 +486,7 @@ def get_onnx_bytes_and_metadata(
486
486
param_dtype = torch .float32
487
487
if weights_dtype in ["fp16" , "bf16" ] and param_dtype == torch .float32 :
488
488
if is_mxfp8_quantized (model ) or is_int4_quantized (model ):
489
- assert weights_dtype == "fp16" , "BF16 + MXFP8 mixed precision is not supported yet"
489
+ assert weights_dtype == "fp16" , "BF16 + MXFP8/INT4 mixed precision is not supported yet"
490
490
onnx_opt_graph = convert_float_to_float16 (
491
491
onnx_opt_graph ,
492
492
keep_io_types = False ,
You can’t perform that action at this time.
0 commit comments