Skip to content

Commit c91893a

Browse files
committed
Add --fast_safetensors option to quantizer script
1 parent e094f3e commit c91893a

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

exllamav2/conversion/convert_exl2.py

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -31,6 +31,7 @@
3131
parser.add_argument("-ml", "--measurement_length", type = int, default = 2048, help = "Max no. tokens per sample when measuring")
3232
parser.add_argument("-so", "--status_output", action = "store_true", help = "Include machine-parseable status updates in console output")
3333
parser.add_argument("-hsol", "--hidden_state_offload_layers", type = int, default = 0, help = "Number of hidden/target states to keep in VRAM. Speed-up but increases VRAM usage")
34+
parser.add_argument("-fst", "--fast_safetensors", action = "store_true", help = "Use fast-safetensors to load layers of the unquantized model. This can help alleviate some out-of-memory issues, especially on Windows.")
3435

3536
args = parser.parse_args()
3637

@@ -112,6 +113,7 @@ def save_job():
112113
"rope_scale": args.rope_scale,
113114
"rope_alpha": args.rope_alpha,
114115
"output_measurement": output_measurement,
116+
"fast_safetensors": args.fast_safetensors,
115117
"progress": "begin"}
116118

117119
if args.measurement is not None:
@@ -160,6 +162,8 @@ def save_job():
160162
else:
161163
print(f" -- Measurement will be saved to {job['output_measurement']}")
162164
print(f" !! Conversion script will end after measurement pass")
165+
if job.get("fast_safetensors"):
166+
print(f" -- Enabled fast_safetensors option.")
163167

164168
if job['rope_scale']: print(f" -- RoPE scale: {job['rope_scale']:.2f}")
165169
if job['rope_alpha']: print(f" -- RoPE alpha: {job['rope_alpha']:.2f}")
@@ -190,6 +194,10 @@ def save_job():
190194

191195
tokenizer = ExLlamaV2Tokenizer(config)
192196

197+
# Set fast_safetensors in config
198+
199+
if job.get("fast_safetensors"): config.fasttensors = True
200+
193201
# Set scaling for input model
194202

195203
if job["rope_scale"] is not None: config.scale_pos_emb = job["rope_scale"]

0 commit comments

Comments
 (0)