@@ -702,7 +702,11 @@ def _prepare_for_llama_export(llm_config: LlmConfig) -> LLMEdgeManager:
702702 checkpoint = llm_config .base .checkpoint ,
703703 checkpoint_dtype = DType .from_torch_dtype (checkpoint_dtype ), # type: ignore
704704 tokenizer_path = llm_config .base .tokenizer_path ,
705- use_spin_quant = llm_config .quantization .use_spin_quant .value if llm_config .quantization .use_spin_quant else None ,
705+ use_spin_quant = (
706+ llm_config .quantization .use_spin_quant .value
707+ if llm_config .quantization .use_spin_quant
708+ else None
709+ ),
706710 embedding_quantize = llm_config .quantization .embedding_quantize ,
707711 use_shared_embedding = llm_config .model .use_shared_embedding ,
708712 quantization_mode = llm_config .quantization .qmode ,
@@ -726,7 +730,9 @@ def _prepare_for_llama_export(llm_config: LlmConfig) -> LLMEdgeManager:
726730 vulkan = llm_config .backend .vulkan .enabled ,
727731 use_qat = llm_config .quantization .use_qat ,
728732 use_lora = llm_config .base .use_lora ,
729- preq_mode = llm_config .base .preq_mode .value if llm_config .base .preq_mode else None ,
733+ preq_mode = (
734+ llm_config .base .preq_mode .value if llm_config .base .preq_mode else None
735+ ),
730736 preq_group_size = llm_config .base .preq_group_size ,
731737 preq_embedding_quantize = llm_config .base .preq_embedding_quantize ,
732738 local_global_attention = llm_config .model .local_global_attention ,
@@ -738,7 +744,12 @@ def _prepare_for_llama_export(llm_config: LlmConfig) -> LLMEdgeManager:
738744
739745def get_quantizer_and_quant_params (llm_config ):
740746 pt2e_quant_params = get_pt2e_quantization_params (
741- llm_config .quantization .pt2e_quantize .value if llm_config .quantization .pt2e_quantize else None , llm_config .quantization .qmode
747+ (
748+ llm_config .quantization .pt2e_quantize .value
749+ if llm_config .quantization .pt2e_quantize
750+ else None
751+ ),
752+ llm_config .quantization .qmode ,
742753 )
743754 quantizers = get_pt2e_quantizers (pt2e_quant_params , llm_config .export .so_library )
744755 quant_dtype = None
@@ -750,13 +761,17 @@ def get_quantizer_and_quant_params(llm_config):
750761 quantizers .append (qnn_quantizer )
751762 if llm_config .backend .coreml .enabled and llm_config .quantization .pt2e_quantize :
752763 assert len (quantizers ) == 0 , "Should not enable both xnnpack / qnn and coreml"
753- coreml_quantizer = get_coreml_quantizer (llm_config .quantization .pt2e_quantize .value )
764+ coreml_quantizer = get_coreml_quantizer (
765+ llm_config .quantization .pt2e_quantize .value
766+ )
754767 quantizers .append (coreml_quantizer )
755768 if llm_config .backend .vulkan .enabled and llm_config .quantization .pt2e_quantize :
756769 assert (
757770 len (quantizers ) == 0
758771 ), "Should not enable both vulkan and other quantizers"
759- vulkan_quantizer = get_vulkan_quantizer (llm_config .quantization .pt2e_quantize .value )
772+ vulkan_quantizer = get_vulkan_quantizer (
773+ llm_config .quantization .pt2e_quantize .value
774+ )
760775 quantizers .append (vulkan_quantizer )
761776 logging .info (f"Applying quantizers: { quantizers } " )
762777 return pt2e_quant_params , quantizers , quant_dtype
@@ -1076,9 +1091,17 @@ def _export_llama(llm_config: LlmConfig) -> LLMEdgeManager: # noqa: C901
10761091 enable_dynamic_shape = llm_config .model .enable_dynamic_shape ,
10771092 use_kv_cache = llm_config .model .use_kv_cache ,
10781093 embedding_quantize = llm_config .quantization .embedding_quantize ,
1079- pt2e_quantize = llm_config .quantization .pt2e_quantize .value if llm_config .quantization .pt2e_quantize else None ,
1094+ pt2e_quantize = (
1095+ llm_config .quantization .pt2e_quantize .value
1096+ if llm_config .quantization .pt2e_quantize
1097+ else None
1098+ ),
10801099 coreml_ios = llm_config .backend .coreml .ios ,
1081- coreml_quantize = llm_config .backend .coreml .quantize .value if llm_config .backend .coreml .quantize else None ,
1100+ coreml_quantize = (
1101+ llm_config .backend .coreml .quantize .value
1102+ if llm_config .backend .coreml .quantize
1103+ else None
1104+ ),
10821105 coreml_compute_units = llm_config .backend .coreml .compute_units .value ,
10831106 use_qnn_sha = llm_config .backend .qnn .use_sha ,
10841107 num_sharding = llm_config .backend .qnn .num_sharding ,
0 commit comments