2626from optimum .configuration_utils import BaseConfig
2727
2828from ..utils .import_utils import is_nncf_available
29- from .utils import PREDEFINED_SD_DATASETS , PREDEFINED_VISUAL_LM_DATASETS
29+ from .utils import PREDEFINED_SD_DATASETS , PREDEFINED_SPEECH_TO_TEXT_DATASETS , PREDEFINED_VISUAL_LM_DATASETS
3030
3131
3232if is_nncf_available ():
@@ -255,6 +255,10 @@ def __init__(
255255 sym : bool = False ,
256256 ignored_scope : Optional [dict ] = None ,
257257 num_samples : Optional [int ] = None ,
258+ dataset : Optional [Optional [Union [str , List [str ]]]] = None ,
259+ tokenizer : Optional [str ] = None ,
260+ processor : Optional [str ] = None ,
261+ trust_remote_code : bool = False ,
258262 ** kwargs ,
259263 ):
260264 """
@@ -272,6 +276,10 @@ def __init__(
272276 self .bits = bits
273277 self .sym = sym
274278 self .num_samples = num_samples
279+ self .dataset = dataset
280+ self .tokenizer = tokenizer
281+ self .processor = processor
282+ self .trust_remote_code = trust_remote_code
275283
276284 if isinstance (ignored_scope , nncf .IgnoredScope ):
277285 ignored_scope = ignored_scope .__dict__
@@ -313,6 +321,10 @@ class OVWeightQuantizationConfig(OVQuantizationConfigBase):
313321 user or organization name, like `dbmdz/bert-base-german-cased`.
314322 - A path to a *directory* containing vocabulary files required by the tokenizer, for instance saved
315323 using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
324+ trust_remote_code (`bool`, defaults to `False`):
325+ Allows to use custom code for the modeling hosted in the model repository. This option should only be set
326+ for repositories you trust and in which you have read the code, as it will execute on your local machine
327+ arbitrary code present in the model repository.
316328 dataset (`str or List[str]`, *optional*):
317329 The dataset used for data-aware compression with NNCF.
318330 - For language models you can provide your own dataset in a list of strings or just use one from the list
@@ -395,10 +407,16 @@ def __init__(
395407 backup_precision : Optional [str ] = None ,
396408 ** kwargs ,
397409 ):
398- super ().__init__ (bits = bits , sym = sym , ignored_scope = ignored_scope , num_samples = num_samples )
399- self .tokenizer = tokenizer
400- self .trust_remote_code = trust_remote_code
401- self .dataset = dataset
410+ super ().__init__ (
411+ bits = bits ,
412+ sym = sym ,
413+ ignored_scope = ignored_scope ,
414+ num_samples = num_samples ,
415+ dataset = dataset ,
416+ tokenizer = tokenizer ,
417+ processor = processor ,
418+ trust_remote_code = trust_remote_code ,
419+ )
402420 self .group_size = group_size or (- 1 if bits == 8 else 128 )
403421 self .ratio = ratio
404422 self .all_layers = all_layers
@@ -407,7 +425,6 @@ def __init__(
407425 self .scale_estimation = scale_estimation
408426 self .weight_format = weight_format
409427 self .gptq = gptq
410- self .processor = processor
411428 self .lora_correction = lora_correction
412429 self .backup_precision = backup_precision
413430 self .post_init ()
@@ -535,6 +552,11 @@ def __init__(
535552 model_type : str = "transformer" ,
536553 fast_bias_correction : bool = True ,
537554 overflow_fix : str = "disable" ,
555+ dataset : Optional [str ] = None ,
556+ tokenizer : Optional [str ] = None ,
557+ processor : Optional [str ] = None ,
558+ trust_remote_code : bool = False ,
559+ smooth_quant_alpha : Optional [float ] = None ,
538560 ** kwargs ,
539561 ):
540562 """
@@ -557,11 +579,42 @@ def __init__(
557579 Whether to apply fast or full bias correction algorithm.
558580 overflow_fix (`str`, default to "disable"):
559581 Parameter for controlling overflow fix setting.
582+ dataset (`str`, *optional*):
583+ The dataset used for quantization. For text-to-speech model quantization the allowed value is 'librispeech'.
584+ tokenizer (`str`, *optional*):
585+ The tokenizer used to process the dataset. You can pass either:
586+ - A string, the *model id* of a predefined tokenizer hosted inside a model repo on huggingface.co.
587+ Valid model ids can be located at the root-level, like `bert-base-uncased`, or namespaced under a
588+ user or organization name, like `dbmdz/bert-base-german-cased`.
589+ - A path to a *directory* containing vocabulary files required by the tokenizer, for instance saved
590+ using the [`~PreTrainedTokenizer.save_pretrained`] method, e.g., `./my_model_directory/`.
591+ processor (`str`, *optional*):
592+ A transformers processor used to process inputs for multi-modal models. You can pass either:
593+ - A string, the *model id* of a predefined processor hosted inside a model repo on huggingface.co.
594+ - A path to a *directory* containing files required by the processor, for instance saved
595+ using the [`~AutoProcessor.save_pretrained`] method, e.g., `./my_model_directory/`.
596+ trust_remote_code (`bool`, defaults to `False`):
597+ Allows to use custom code for the modeling hosted in the model repository. This option should only be set
598+ for repositories you trust and in which you have read the code, as it will execute on your local machine
599+ arbitrary code present in the model repository.
600+ smooth_quant_alpha (`float`, *optional*):
601+ SmoothQuant alpha parameter that improves the distribution of activations before MatMul layers and
602+ reduces quantization error.
560603 """
561- super ().__init__ (bits = bits , sym = sym , ignored_scope = ignored_scope , num_samples = num_samples )
604+ super ().__init__ (
605+ bits = bits ,
606+ sym = sym ,
607+ ignored_scope = ignored_scope ,
608+ num_samples = num_samples ,
609+ dataset = dataset ,
610+ tokenizer = tokenizer ,
611+ processor = processor ,
612+ trust_remote_code = trust_remote_code ,
613+ )
562614 self .model_type = model_type
563615 self .fast_bias_correction = fast_bias_correction
564616 self .overflow_fix = overflow_fix
617+ self .smooth_quant_alpha = smooth_quant_alpha
565618 self .post_init ()
566619
567620 def post_init (self ):
@@ -573,6 +626,18 @@ def post_init(self):
573626 if self .bits != 8 :
574627 raise ValueError (f"Only support 8-bit for static quantization but found { self .bits } " )
575628
629+ if self .dataset is not None :
630+ if self .dataset not in PREDEFINED_SPEECH_TO_TEXT_DATASETS :
631+ raise ValueError (
632+ f"You have entered the following string value for dataset: { self .dataset } . But it is not supported."
633+ f" Currently you can only choose { list (PREDEFINED_SPEECH_TO_TEXT_DATASETS .keys ())} ."
634+ )
635+
636+ if self .smooth_quant_alpha is not None and not (0 <= self .smooth_quant_alpha <= 1 ):
637+ raise ValueError (
638+ f"SmoothQuant alpha parameter must be in range [0, 1], but found { self .smooth_quant_alpha } "
639+ )
640+
576641
577642class OVConfig (BaseConfig ):
578643 CONFIG_NAME = "openvino_config.json"
0 commit comments