File tree Expand file tree Collapse file tree 1 file changed +4
-0
lines changed
intel_extension_for_pytorch/llm Expand file tree Collapse file tree 1 file changed +4
-0
lines changed Original file line number Diff line number Diff line change @@ -1020,6 +1020,8 @@ def shard_low_precision_checkpoint(
10201020 raise AssertionError (f"{ quantization_method } is not supported yet." )
10211021 elif any (substring in key for substring in mha_layers_split_by_K ):
10221022 data = low_precision_checkpoint_dict [key ]
1023+ if ("scales" in key or "qzeros" in key ) and data .shape [0 ] == 1 :
1024+ continue
10231025 if quantization_method == "awq" :
10241026 # qweight shape: [K, N // 8]
10251027 # scales shape: [K // G, N]
@@ -1061,6 +1063,8 @@ def shard_low_precision_checkpoint(
10611063 raise AssertionError (f"{ quantization_method } is not supported yet." )
10621064 elif any (substring in key for substring in mlp_layers_split_by_K ):
10631065 data = low_precision_checkpoint_dict [key ]
1066+ if ("scales" in key or "qzeros" in key ) and data .shape [0 ] == 1 :
1067+ continue
10641068 if quantization_method == "awq" :
10651069 # qweight shape: [K, N // 8]
10661070 # scales shape: [K // G, N]
You can’t perform that action at this time.
0 commit comments