@@ -1091,7 +1091,12 @@ def get_quant_config(
10911091 or hasattr (module , quantizer_attr_names (weight_name ).input_quantizer )
10921092 for weight_name in weight_names
10931093 )
1094- if has_quantizers :
1094+
1095+ # Skip LORA module and adapters.
1096+ # ModelOpt does not currently quantize these layers in QLoRA path.
1097+ is_lora = hasattr (module , "base_layer" ) or "lora_A" in name or "lora_B" in name
1098+
1099+ if has_quantizers and not is_lora :
10951100 quantization_format = get_quantization_format (module )
10961101
10971102 # For MoE expert modules, we need to extract block size from the correct weight quantizer
@@ -1102,24 +1107,15 @@ def get_quant_config(
11021107 weight_block_size = get_weight_block_size (module , weight_name )
11031108 if weight_block_size > 0 :
11041109 block_size = weight_block_size
1105- weight_quantizer_enabled = True
11061110 break
11071111
11081112 # Fallback to default weight quantizer if no specific weight quantizer found
11091113 if block_size == 0 :
11101114 block_size = get_weight_block_size (module )
1111- weight_quantizer = getattr (
1112- module , quantizer_attr_names ("weight" ).weight_quantizer , None
1113- )
1114- # Check if weight_quantizer is enabled
1115- weight_quantizer_enabled = block_size > 0 or (
1116- weight_quantizer is not None and weight_quantizer .is_enabled
1117- )
11181115
1119- if weight_quantizer_enabled :
1120- # Construct per layer config dictionary
1121- layer_config_dict [name + ".quantization" ] = quantization_format
1122- layer_config_dict [name + ".awq_block_size" ] = block_size
1116+ # Construct per layer config dictionary
1117+ layer_config_dict [name + ".quantization" ] = quantization_format
1118+ layer_config_dict [name + ".awq_block_size" ] = block_size
11231119
11241120 # Find kv cache quant format
11251121 if (
0 commit comments