@@ -266,6 +266,7 @@ def __init__(
266266 tokenizer : Optional [str ] = None ,
267267 processor : Optional [str ] = None ,
268268 trust_remote_code : bool = False ,
269+ weight_format : Optional [str ] = None ,
269270 ** kwargs ,
270271 ):
271272 """
@@ -279,6 +280,18 @@ def __init__(
279280 entries provided via this argument are used to create an instance of `nncf.IgnoredScope` class.
280281 num_samples (`int`, *optional*):
281282 The maximum number of samples composing the calibration dataset.
283+ dataset (`str or List[str]`, *optional*):
284+ The dataset used for data-aware optimization with NNCF.
285+ tokenizer (`str`, *optional*):
286+ The tokenizer used to process the dataset.
287+ processor (`str`, *optional*):
288+ A transformers processor used to process the dataset inputs.
289+ trust_remote_code (`bool`, defaults to `False`):
290+ Allows to use custom code for the modeling hosted in the model repository. This option should only be
291+ set for repositories you trust and in which you have read the code, as it will execute on your local
292+ machine arbitrary code present in the model repository.
293+ weight_format (`str`, *optional*):
294+ Data format weights are compressed to.
282295 """
283296 self .bits = bits
284297 self .sym = sym
@@ -287,6 +300,7 @@ def __init__(
287300 self .tokenizer = tokenizer
288301 self .processor = processor
289302 self .trust_remote_code = trust_remote_code
303+ self .weight_format = weight_format
290304
291305 if isinstance (ignored_scope , nncf .IgnoredScope ):
292306 ignored_scope = ignored_scope .__dict__
@@ -370,7 +384,7 @@ class OVWeightQuantizationConfig(OVQuantizationConfigBase):
370384 scale_estimation (`bool`, *optional*):
371385 Indicates whether to apply a scale estimation algorithm that minimizes the L2 error between the original and
372386 compressed layers. Providing a dataset is required to run scale estimation.
373- weight_format (`str`, defaults to 'int' ):
387+ weight_format (`str`, *optional* ):
374388 Data format weights are compressed to. Possible values: ['int4', 'int8', 'mxfp4', 'nf4'].
375389 qptq (`bool`, *optional*):
376390 Whether to apply GPTQ algorithm. GPTQ optimizes compressed weights in a layer-wise fashion to minimize the
@@ -425,14 +439,14 @@ def __init__(
425439 tokenizer = tokenizer ,
426440 processor = processor ,
427441 trust_remote_code = trust_remote_code ,
442+ weight_format = weight_format ,
428443 )
429444 self .group_size = group_size or (- 1 if bits == 8 else 128 )
430445 self .ratio = ratio
431446 self .all_layers = all_layers
432447 self .sensitivity_metric = sensitivity_metric
433448 self .quant_method = OVQuantizationMethod (quant_method ) if isinstance (quant_method , str ) else quant_method
434449 self .scale_estimation = scale_estimation
435- self .weight_format = weight_format
436450 self .gptq = gptq
437451 self .lora_correction = lora_correction
438452 self .backup_precision = backup_precision
@@ -578,6 +592,8 @@ def __init__(
578592 processor : Optional [str ] = None ,
579593 trust_remote_code : bool = False ,
580594 smooth_quant_alpha : Optional [float ] = None ,
595+ weight_format : Optional [str ] = "int8" ,
596+ activation_format : Optional [str ] = "int8" ,
581597 ** kwargs ,
582598 ):
583599 """
@@ -621,6 +637,10 @@ def __init__(
621637 smooth_quant_alpha (`float`, *optional*):
622638 SmoothQuant alpha parameter that improves the distribution of activations before MatMul layers and
623639 reduces quantization error.
640+ weight_format (`str`, defaults to "int8"):
641+ Data format weights are quantized to. Possible values: ['int8'].
642+ activation_format (`str`, defaults to "int8"):
643+ Data format activations are compressed to. Possible values: ['int8'].
624644 """
625645 super ().__init__ (
626646 bits = bits ,
@@ -631,11 +651,13 @@ def __init__(
631651 tokenizer = tokenizer ,
632652 processor = processor ,
633653 trust_remote_code = trust_remote_code ,
654+ weight_format = weight_format ,
634655 )
635656 self .model_type = model_type
636657 self .fast_bias_correction = fast_bias_correction
637658 self .overflow_fix = overflow_fix
638659 self .smooth_quant_alpha = smooth_quant_alpha
660+ self .activation_format = activation_format
639661 self .post_init ()
640662
641663 def post_init (self ):
@@ -659,6 +681,12 @@ def post_init(self):
659681 f"SmoothQuant alpha parameter must be in range [0, 1], but found { self .smooth_quant_alpha } "
660682 )
661683
684+ if self .weight_format != "int8" :
685+ raise ValueError ("Only 'int8' weight format is currently supported." )
686+
687+ if self .activation_format != "int8" :
688+ raise ValueError ("Only 'int8' activation format is currently supported." )
689+
662690
663691class OVConfig (BaseConfig ):
664692 CONFIG_NAME = "openvino_config.json"
0 commit comments