Skip to content

Commit 3fbf37d

Browse files
authored
support group_size=-1 for sharding checkpoint (#3432) (#3434)
* support group_size=-1 * improve code quanlity
1 parent b639eb7 commit 3fbf37d

File tree

1 file changed

+4
-0
lines changed

1 file changed

+4
-0
lines changed

intel_extension_for_pytorch/llm/utils.py

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1020,6 +1020,8 @@ def shard_low_precision_checkpoint(
10201020
raise AssertionError(f"{quantization_method} is not supported yet.")
10211021
elif any(substring in key for substring in mha_layers_split_by_K):
10221022
data = low_precision_checkpoint_dict[key]
1023+
if ("scales" in key or "qzeros" in key) and data.shape[0] == 1:
1024+
continue
10231025
if quantization_method == "awq":
10241026
# qweight shape: [K, N // 8]
10251027
# scales shape: [K // G, N]
@@ -1061,6 +1063,8 @@ def shard_low_precision_checkpoint(
10611063
raise AssertionError(f"{quantization_method} is not supported yet.")
10621064
elif any(substring in key for substring in mlp_layers_split_by_K):
10631065
data = low_precision_checkpoint_dict[key]
1066+
if ("scales" in key or "qzeros" in key) and data.shape[0] == 1:
1067+
continue
10641068
if quantization_method == "awq":
10651069
# qweight shape: [K, N // 8]
10661070
# scales shape: [K // G, N]

0 commit comments

Comments
 (0)