support group_size=-1 for sharding checkpoint (#3432) (#3434)

RanTao123 · web-flow · commit 3fbf37d4d710 · 2025-01-08T17:12:43.000+08:00
* support group_size=-1

* improve code quanlity
diff --git a/intel_extension_for_pytorch/llm/utils.py b/intel_extension_for_pytorch/llm/utils.py
@@ -1020,6 +1020,8 @@ def shard_low_precision_checkpoint(
                 raise AssertionError(f"{quantization_method} is not supported yet.")
         elif any(substring in key for substring in mha_layers_split_by_K):
             data = low_precision_checkpoint_dict[key]
+            if ("scales" in key or "qzeros" in key) and data.shape[0] == 1:
+                continue
             if quantization_method == "awq":
                 # qweight shape: [K, N // 8]
                 # scales shape: [K // G, N]
@@ -1061,6 +1063,8 @@ def shard_low_precision_checkpoint(
                 raise AssertionError(f"{quantization_method} is not supported yet.")
         elif any(substring in key for substring in mlp_layers_split_by_K):
             data = low_precision_checkpoint_dict[key]
+            if ("scales" in key or "qzeros" in key) and data.shape[0] == 1:
+                continue
             if quantization_method == "awq":
                 # qweight shape: [K, N // 8]
                 # scales shape: [K // G, N]