1
+ import os
1
2
import shutil
2
3
import sys
3
4
import unittest
11
12
from auto_round import AutoRound
12
13
13
14
15
+ def _get_folder_size (path : str ) -> float :
16
+ """Return folder size in GB."""
17
+ total_size = 0
18
+ for dirpath , _ , filenames in os .walk (path ):
19
+ for f in filenames :
20
+ fp = os .path .join (dirpath , f )
21
+ if os .path .isfile (fp ):
22
+ total_size += os .path .getsize (fp )
23
+ return total_size / (1024 ** 3 ) # convert to GB
24
+
25
+
14
26
class LLMDataLoader :
15
27
def __init__ (self ):
16
28
self .batch_size = 1
@@ -25,7 +37,7 @@ class TestAutoRound(unittest.TestCase):
25
37
def setUpClass (self ):
26
38
model_name = "facebook/opt-125m"
27
39
self .save_dir = "./saved"
28
- self .model = AutoModelForCausalLM .from_pretrained (model_name , dtype = "auto" , trust_remote_code = True )
40
+ self .model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = "auto" , trust_remote_code = True )
29
41
self .tokenizer = AutoTokenizer .from_pretrained (model_name , trust_remote_code = True )
30
42
self .llm_dataloader = LLMDataLoader ()
31
43
@@ -268,10 +280,7 @@ def test_mxfp4_llmcompressor_format(self):
268
280
model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = "auto" , trust_remote_code = True )
269
281
from transformers import AutoConfig
270
282
271
- bits = 4
272
- data_type = "mx_fp"
273
- group_size = 32
274
- sym = True
283
+ scheme = "MXFP4"
275
284
layer_config = {}
276
285
fp_layers_str = "k_proj"
277
286
from auto_round .utils import get_fp_layer_names
@@ -282,12 +291,58 @@ def test_mxfp4_llmcompressor_format(self):
282
291
autoround = AutoRound (
283
292
model ,
284
293
self .tokenizer ,
285
- bits = bits ,
286
- group_size = group_size ,
287
- sym = sym ,
294
+ scheme = scheme ,
288
295
iters = 2 ,
289
296
seqlen = 2 ,
290
- data_type = data_type ,
297
+ layer_config = layer_config ,
298
+ dataset = self .llm_dataloader ,
299
+ )
300
+ quantized_model_path = self .save_dir
301
+ autoround .quantize ()
302
+ compressed_model = autoround .save_quantized (
303
+ output_dir = quantized_model_path , inplace = True , format = "llm_compressor"
304
+ )
305
+ tmp_layer = compressed_model .model .decoder .layers [3 ].self_attn .q_proj
306
+ skip_layer = compressed_model .model .decoder .layers [3 ].self_attn .k_proj
307
+ assert (
308
+ hasattr (tmp_layer , "weight_scale" )
309
+ and hasattr (tmp_layer , "weight_packed" )
310
+ and tmp_layer .weight_scale .dtype is torch .uint8
311
+ and tmp_layer .weight_scale .shape [0 ] == 768
312
+ ), "Illegal MXFP4 packing name or data_type or shape"
313
+ assert not hasattr (skip_layer , "weight_scale" ) and not hasattr ( ## check skipped layers
314
+ skip_layer , "weight_packed"
315
+ ), "Illegal MXFP4 quantization for fp_layers"
316
+ quantization_config = AutoConfig .from_pretrained (
317
+ quantized_model_path , trust_remote_code = True
318
+ ).quantization_config
319
+ assert (
320
+ quantization_config ["format" ] == "float-quantized"
321
+ and quantization_config ["config_groups" ]["group_0" ]["weights" ]["is_mx" ] is True
322
+ and quantization_config ["config_groups" ]["group_0" ]["weights" ]["num_bits" ] == 4
323
+ ), f"Invalid MXFP4 quantization configuration: { quantization_config } "
324
+
325
+ shutil .rmtree ("./saved" , ignore_errors = True )
326
+
327
+ def test_rtn_mxfp4_llmcompressor_format (self ):
328
+ model_name = "facebook/opt-125m"
329
+ model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = "auto" , trust_remote_code = True )
330
+ from transformers import AutoConfig
331
+
332
+ scheme = "MXFP4"
333
+ layer_config = {}
334
+ fp_layers_str = "k_proj"
335
+ from auto_round .utils import get_fp_layer_names
336
+
337
+ not_quantize_layer_names = get_fp_layer_names (model , fp_layers_str )
338
+ for name in not_quantize_layer_names :
339
+ layer_config [name ] = {"bits" : 16 , "act_bits" : 16 , "data_type" : "float" }
340
+ autoround = AutoRound (
341
+ model ,
342
+ self .tokenizer ,
343
+ scheme = scheme ,
344
+ iters = 0 ,
345
+ seqlen = 2 ,
291
346
layer_config = layer_config ,
292
347
dataset = self .llm_dataloader ,
293
348
)
@@ -322,19 +377,13 @@ def test_mxfp8_llmcompressor_format(self):
322
377
model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = "auto" , trust_remote_code = True )
323
378
from transformers import AutoConfig
324
379
325
- bits = 8
326
- data_type = "mx_fp_rceil"
327
- group_size = 32
328
- sym = True
380
+ scheme = "MXFP8"
329
381
autoround = AutoRound (
330
382
model ,
331
383
self .tokenizer ,
332
- bits = bits ,
333
- group_size = group_size ,
334
- sym = sym ,
384
+ scheme = scheme ,
335
385
iters = 2 ,
336
386
seqlen = 2 ,
337
- data_type = data_type ,
338
387
dataset = self .llm_dataloader ,
339
388
)
340
389
quantized_model_path = self .save_dir
@@ -355,28 +404,23 @@ def test_mxfp8_llmcompressor_format(self):
355
404
and quantization_config ["config_groups" ]["group_0" ]["weights" ]["is_mx" ] is True
356
405
and quantization_config ["config_groups" ]["group_0" ]["weights" ]["num_bits" ] == 8
357
406
), f"Invalid MXFP8 quantization configuration: { quantization_config } "
407
+ folder_size_gb = _get_folder_size (quantized_model_path )
408
+ # Original opt-125m is < 0.5GB -> quantized mxfp8 model should be smaller but not empty
409
+ assert (
410
+ 0.15 < folder_size_gb < 0.2
411
+ ), f"Quantized model folder size { folder_size_gb :.2f} GB is outside the expected range (0.1~0.2 GB)"
358
412
shutil .rmtree ("./saved" , ignore_errors = True )
359
413
360
414
def test_nvfp4_llmcompressor_format (self ):
361
415
model_name = "facebook/opt-125m"
362
416
model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = "auto" , trust_remote_code = True )
363
417
from transformers import AutoConfig
364
418
365
- bits = 4
366
- act_bits = 4
367
- data_type = "nv_fp"
368
- act_data_type = "nv_fp4_with_static_gs"
369
- group_size = 16
370
- sym = True
419
+ scheme = "NVFP4"
371
420
autoround = AutoRound (
372
421
model ,
373
422
self .tokenizer ,
374
- bits = bits ,
375
- act_bits = act_bits ,
376
- data_type = data_type ,
377
- act_data_type = act_data_type ,
378
- group_size = group_size ,
379
- sym = sym ,
423
+ scheme = scheme ,
380
424
iters = 2 ,
381
425
seqlen = 2 ,
382
426
dataset = self .llm_dataloader ,
@@ -399,28 +443,23 @@ def test_nvfp4_llmcompressor_format(self):
399
443
quantization_config ["format" ] == "nvfp4-pack-quantized"
400
444
and quantization_config ["config_groups" ]["group_0" ]["input_activations" ]["num_bits" ] == 4
401
445
), f"Invalid NVFP4 quantization configuration: { quantization_config } "
446
+ folder_size_gb = _get_folder_size (quantized_model_path )
447
+ # Original opt-125m is < 0.5GB -> quantized nvfp4 model should be smaller but not empty
448
+ assert (
449
+ 0.1 < folder_size_gb < 0.15
450
+ ), f"Quantized model folder size { folder_size_gb :.2f} GB is outside the expected range (0.1~0.15 GB)"
402
451
shutil .rmtree ("./saved" , ignore_errors = True )
403
452
404
453
def test_nvfp4_autoround_format (self ):
405
454
model_name = "facebook/opt-125m"
406
455
model = AutoModelForCausalLM .from_pretrained (model_name , torch_dtype = "auto" , trust_remote_code = True )
407
456
from transformers import AutoConfig
408
457
409
- bits = 4
410
- act_bits = 4
411
- data_type = "nv_fp"
412
- act_data_type = "nv_fp4_with_static_gs"
413
- group_size = 16
414
- sym = True
458
+ scheme = "NVFP4"
415
459
autoround = AutoRound (
416
460
model ,
417
461
self .tokenizer ,
418
- bits = bits ,
419
- act_bits = act_bits ,
420
- data_type = data_type ,
421
- act_data_type = act_data_type ,
422
- group_size = group_size ,
423
- sym = sym ,
462
+ scheme = "NVFP4" ,
424
463
iters = 2 ,
425
464
seqlen = 2 ,
426
465
dataset = self .llm_dataloader ,
0 commit comments