We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
1 parent e48d9db commit 6d7a103Copy full SHA for 6d7a103
examples/sparse_2of4_quantization_fp8/llama3_8b_2of4.py
@@ -116,5 +116,7 @@ def get_recipe(fp8_enabled):
116
print("==========================================\n")
117
118
# Save compressed model and tokenizer
119
-model.save_pretrained(save_dir, save_compressed=args.fp8)
+model.save_pretrained(
120
+ save_dir, save_compressed=args.fp8, disable_sparse_compression=True
121
+)
122
tokenizer.save_pretrained(save_dir)
0 commit comments