workaroud for prebuild error (ROCm#1588)

yzhou103 · web-flow · commit 8910746b98e7 · 2025-12-09T14:15:43.000+08:00
* workaroud for prebuild error

* update

* fix lint
diff --git a/csrc/ck_deepgemm/gen_instances.py b/csrc/ck_deepgemm/gen_instances.py
@@ -19,7 +19,7 @@ def __init__(self, working_path, istune=False):
         # self.b_dtype = b_dtype.upper()
         # self.c_dtype = c_dtype.upper()
         # self.quant_type = quant_type
-        assert (istune == False, "not surpport tuning!")
+        assert istune == False, "not surpport tuning!"
 
     def gen_instance(self, k: kernelInstance):
         INSTANCE_IMPL = f"""// SPDX-License-Identifier: MIT
diff --git a/csrc/ck_gemm_a8w8_bpreshuffle/gen_instances.py b/csrc/ck_gemm_a8w8_bpreshuffle/gen_instances.py
@@ -236,14 +236,16 @@ def get_tune_dict(tune_dict_csv):
             gpu = torch.cuda.current_device()
             device_properties = torch.cuda.get_device_properties(gpu)
             cu_num = device_properties.multi_processor_count
-            tune_df = tune_df[
-                (tune_df["cu_num"] == cu_num) & (tune_df["libtype"] == "ck")
-            ].reset_index()
+            tune_df = tune_df[(tune_df["cu_num"] == cu_num)].reset_index()
+        tune_df = tune_df[tune_df["libtype"] == "ck"].reset_index()
         for i in range(len(tune_df)):
             M = tune_df.loc[i, "M"]
             N = tune_df.loc[i, "N"]
             K = tune_df.loc[i, "K"]
             kid = tune_df.loc[i, "kernelId"]
+            if kid < 0 or kid >= len(kernels_list):
+                print(f"[Warning]: kernelId {kid} is out of range, skip it")
+                continue
             tune_dict[(M, N, K)] = kernels_list[kid]
     return tune_dict
 
diff --git a/csrc/cktile_gemm_a8w8_bpreshuffle/gen_instances.py b/csrc/cktile_gemm_a8w8_bpreshuffle/gen_instances.py
@@ -233,14 +233,16 @@ def get_tune_dict(tune_dict_csv):
             gpu = torch.cuda.current_device()
             device_properties = torch.cuda.get_device_properties(gpu)
             cu_num = device_properties.multi_processor_count
-            tune_df = tune_df[
-                (tune_df["cu_num"] == cu_num) & (tune_df["libtype"] == "cktile")
-            ].reset_index()
+            tune_df = tune_df[(tune_df["cu_num"] == cu_num)].reset_index()
+        tune_df = tune_df[tune_df["libtype"] == "cktile"].reset_index()
         for i in range(len(tune_df)):
             M = tune_df.loc[i, "M"]
             N = tune_df.loc[i, "N"]
             K = tune_df.loc[i, "K"]
             kid = tune_df.loc[i, "kernelId"]
+            if kid < 0 or kid > len(kernels_list):
+                print(f"[Warning]: kernelId {kid} is out of range, skip it")
+                continue
             tune_dict[(M, N, K)] = kernels_list[kid]
     return tune_dict