[Inference] remove useless code and fix bug (#71488)

zhoutianzi666 · web-flow · commit 0a21a7a8e454 · 2025-03-11T19:51:53.000+08:00
* remove useless code and fix a bug
diff --git a/paddle/phi/kernels/fusion/cutlass/cutlass_kernels/cutlass_heuristic.h b/paddle/phi/kernels/fusion/cutlass/cutlass_kernels/cutlass_heuristic.h
@@ -36,20 +36,24 @@ namespace phi {
 
 static std::vector<CutlassTileConfig> get_candidate_tiles(
     const bool is_weight_only,
-    const bool is_weight_only_encoder,
     const bool simt_configs_only,
     const int sm,
     const int group_size,
     const bool is_moe) {
   VLOG(3) << "get_candidate_tiles sm: " << sm;
-  std::vector<CutlassTileConfig> simt_configs{
-      CutlassTileConfig::CtaShape128x128x8_WarpShape64x64x8};
+  if (simt_configs_only) {
+    std::vector<CutlassTileConfig> simt_configs{
+        CutlassTileConfig::CtaShape128x128x8_WarpShape64x64x8};
+    return simt_configs;
+  } else if (!is_weight_only) {
+    std::vector<CutlassTileConfig> square_configs{
+        CutlassTileConfig::CtaShape32x128x64_WarpShape32x32x64,
+        CutlassTileConfig::CtaShape64x128x64_WarpShape32x64x64,
+        CutlassTileConfig::CtaShape128x128x64_WarpShape64x32x64,
+    };
+    return square_configs;
+  }
 
-  std::vector<CutlassTileConfig> square_configs{
-      CutlassTileConfig::CtaShape32x128x64_WarpShape32x32x64,
-      CutlassTileConfig::CtaShape64x128x64_WarpShape32x64x64,
-      CutlassTileConfig::CtaShape128x128x64_WarpShape64x32x64,
-  };
   std::vector<CutlassTileConfig> quant_B_configs_sm70{
       CutlassTileConfig::CtaShape32x128x64_WarpShape32x32x64,
       CutlassTileConfig::CtaShape64x128x64_WarpShape64x64x64,
@@ -92,27 +96,17 @@ static std::vector<CutlassTileConfig> get_candidate_tiles(
       quant_B_configs = quant_B_configs_sm70;
       break;
   }
-  const std::vector<CutlassTileConfig> allowed_quant_B_configs =
-      quant_B_configs;
-  const std::vector<CutlassTileConfig> allowed_configs =
-      is_weight_only ? allowed_quant_B_configs : square_configs;
-  return simt_configs_only ? simt_configs : allowed_configs;
+  return quant_B_configs;
 }
 
 static std::vector<CutlassGemmConfig> get_candidate_configs(
     const int sm,
     const int group_size,
     const bool is_weight_only,
-    const bool is_weight_only_encoder,
     const bool simt_configs_only,
     const bool is_moe) {
-  std::vector<CutlassTileConfig> tiles =
-      get_candidate_tiles(is_weight_only,
-                          is_weight_only_encoder,
-                          simt_configs_only,
-                          sm,
-                          group_size,
-                          is_moe);
+  std::vector<CutlassTileConfig> tiles = get_candidate_tiles(
+      is_weight_only, simt_configs_only, sm, group_size, is_moe);
 
   std::vector<CutlassGemmConfig> candidate_configs;
   const int min_stages = 2;
diff --git a/paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template.cu b/paddle/phi/kernels/fusion/cutlass/cutlass_kernels/fpA_intB_gemm/fpA_intB_gemm_template.cu
@@ -596,9 +596,8 @@ void CutlassFpAIntBGemmRunner<T, WeightType>::run_gemm<EpilogueTag,
     cudaStream_t stream) {
   // VLOG(3)<<__PRETTY_FUNCTION__;
   static constexpr bool is_weight_only = !std::is_same<T, WeightType>::value;
-  const bool is_weight_only_encoder = m >= 512 ? true : false;
-  std::vector<CutlassGemmConfig> candidate_configs = get_candidate_configs(
-      sm_, group_size, is_weight_only, is_weight_only_encoder, false, false);
+  std::vector<CutlassGemmConfig> candidate_configs =
+      get_candidate_configs(sm_, group_size, is_weight_only, false, false);
 
   // Standard GEMM, so 1 "expert". We use the same function for MoE and regular
   // FFN.
diff --git a/paddle/phi/kernels/fusion/cutlass/cutlass_kernels/moe_gemm/fused_moe_gemm_kernels_template.h b/paddle/phi/kernels/fusion/cutlass/cutlass_kernels/moe_gemm/fused_moe_gemm_kernels_template.h