@@ -99,11 +99,15 @@ def _set_runtime_options(
9999 ],
100100 task : str ,
101101 library_name : str ,
102+ quantized_model : bool ,
102103):
103104 for model_name in models_and_export_configs .keys ():
104105 _ , sub_export_config = models_and_export_configs [model_name ]
106+ sub_export_config .runtime_options = {}
105107 if "diffusers" in library_name or "text-generation" in task :
106- sub_export_config .runtime_options = {"ACTIVATIONS_SCALE_FACTOR" : "8.0" }
108+ sub_export_config .runtime_options ["ACTIVATIONS_SCALE_FACTOR" ] = "8.0"
109+ if not quantized_model and "text-generation" in task :
110+ sub_export_config .runtime_options ["KV_CACHE_PRECISION" ] = "f16"
107111
108112
109113def _save_model (
@@ -116,8 +120,8 @@ def _save_model(
116120 compress_to_fp16 = ov_config is not None and ov_config .dtype == "fp16"
117121 model = _add_version_info_to_model (model , library_name )
118122
119- if hasattr (config , "runtime_options" ):
120- model = _add_runtime_options_to_rt_info (model , config . runtime_options )
123+ runtime_options = config . runtime_options if hasattr (config , "runtime_options" ) else {}
124+ model = _add_runtime_options_to_rt_info (model , runtime_options )
121125 save_model (model , path , compress_to_fp16 )
122126 del model
123127 gc .collect ()
@@ -755,7 +759,12 @@ def export_from_model(
755759
756760 model .save_config (output )
757761
758- _set_runtime_options (models_and_export_configs , task , library_name )
762+ _set_runtime_options (
763+ models_and_export_configs ,
764+ task ,
765+ library_name ,
766+ hasattr (ov_config , "quantization_config" ) and ov_config .quantization_config ,
767+ )
759768
760769 export_models (
761770 models_and_export_configs = models_and_export_configs ,
0 commit comments