[SLP] Include vectorized calls in spill cost

lukel97 · lukel97 · commit 276138603570 · 2025-02-04T16:30:29.000+08:00
Even though an intrinsic may be vectorized, the backend may end up scalarizing it. Usually in this case the IntrCost >= CallCost, so NoCallIntrinsic will already detect the case when it's not scalarized and exclude it from the cost. This fixes examples like #include <math.h> void f(double *f) { double a = f[0], b = f[1]; a += 1; b += 1; a = tanh(a); b = tanh(b); a += 1; b += 1; f[0] = a; f[1] = b; } From being unprofitably vectorized to this after llvm#124984 vsetivli zero, 2, e64, m1, ta, ma vle64.v v8, (a0) .Lpcrel_hi0: auipc a0, %pcrel_hi(.LCPI0_0) fld fs1, %pcrel_lo(.Lpcrel_hi0)(a0) vfadd.vf v8, v8, fs1 addi a0, sp, 16 vs1r.v v8, (a0) # Unknown-size Folded Spill vslidedown.vi v8, v8, 1 vfmv.f.s fa0, v8 call tanh fmv.d fs0, fa0 fld fa0, 16(sp) # 8-byte Folded Reload call tanh vsetivli zero, 2, e64, m1, ta, ma vfmv.v.f v8, fa0 vfslide1down.vf v8, v8, fs0 vfadd.vf v8, v8, fs1 vse64.v v8, (s0)
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -12248,10 +12248,10 @@ InstructionCost BoUpSLP::getSpillCost() {
       };
 
       // Debug information does not impact spill cost.
-      // Vectorized calls, represented as vector intrinsics, do not impact spill
-      // cost.
+      // Vectorized calls, represented as vector intrinsics, may still impact
+      // spill cost if scalarized in codegen.
       if (const auto *CB = dyn_cast<CallBase>(&*PrevInstIt);
-          CB && !NoCallIntrinsic(CB) && !isVectorized(CB))
+          CB && !NoCallIntrinsic(CB))
         NumCalls++;
 
       ++PrevInstIt;
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
@@ -145,20 +145,32 @@ entry:
 define void @exp_v2f64(ptr %a) {
 ; CHECK-LABEL: define void @exp_v2f64
 ; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
-; CHECK-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[A]], align 8
-; CHECK-NEXT:    [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], splat (double 1.000000e+00)
-; CHECK-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.exp.v2f64(<2 x double> [[TMP2]])
-; CHECK-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], splat (double 1.000000e+00)
-; CHECK-NEXT:    store <2 x double> [[TMP4]], ptr [[A]], align 8
+; CHECK-NEXT:    [[X:%.*]] = load double, ptr [[A]], align 8
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr double, ptr [[A]], i64 1
+; CHECK-NEXT:    [[Y:%.*]] = load double, ptr [[GEP]], align 8
+; CHECK-NEXT:    [[X_ADD:%.*]] = fadd double [[X]], 1.000000e+00
+; CHECK-NEXT:    [[Y_ADD:%.*]] = fadd double [[Y]], 1.000000e+00
+; CHECK-NEXT:    [[X_EXP:%.*]] = call double @llvm.exp.f64(double [[X_ADD]])
+; CHECK-NEXT:    [[Y_EXP:%.*]] = call double @llvm.exp.f64(double [[Y_ADD]])
+; CHECK-NEXT:    [[X_ADD2:%.*]] = fadd double [[X_EXP]], 1.000000e+00
+; CHECK-NEXT:    [[Y_ADD2:%.*]] = fadd double [[Y_EXP]], 1.000000e+00
+; CHECK-NEXT:    store double [[X_ADD2]], ptr [[A]], align 8
+; CHECK-NEXT:    store double [[Y_ADD2]], ptr [[GEP]], align 8
 ; CHECK-NEXT:    ret void
 ;
 ; DEFAULT-LABEL: define void @exp_v2f64
 ; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
-; DEFAULT-NEXT:    [[TMP1:%.*]] = load <2 x double>, ptr [[A]], align 8
-; DEFAULT-NEXT:    [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], splat (double 1.000000e+00)
-; DEFAULT-NEXT:    [[TMP3:%.*]] = call <2 x double> @llvm.exp.v2f64(<2 x double> [[TMP2]])
-; DEFAULT-NEXT:    [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], splat (double 1.000000e+00)
-; DEFAULT-NEXT:    store <2 x double> [[TMP4]], ptr [[A]], align 8
+; DEFAULT-NEXT:    [[X:%.*]] = load double, ptr [[A]], align 8
+; DEFAULT-NEXT:    [[GEP:%.*]] = getelementptr double, ptr [[A]], i64 1
+; DEFAULT-NEXT:    [[Y:%.*]] = load double, ptr [[GEP]], align 8
+; DEFAULT-NEXT:    [[X_ADD:%.*]] = fadd double [[X]], 1.000000e+00
+; DEFAULT-NEXT:    [[Y_ADD:%.*]] = fadd double [[Y]], 1.000000e+00
+; DEFAULT-NEXT:    [[X_EXP:%.*]] = call double @llvm.exp.f64(double [[X_ADD]])
+; DEFAULT-NEXT:    [[Y_EXP:%.*]] = call double @llvm.exp.f64(double [[Y_ADD]])
+; DEFAULT-NEXT:    [[X_ADD2:%.*]] = fadd double [[X_EXP]], 1.000000e+00
+; DEFAULT-NEXT:    [[Y_ADD2:%.*]] = fadd double [[Y_EXP]], 1.000000e+00
+; DEFAULT-NEXT:    store double [[X_ADD2]], ptr [[A]], align 8
+; DEFAULT-NEXT:    store double [[Y_ADD2]], ptr [[GEP]], align 8
 ; DEFAULT-NEXT:    ret void
 ;
   %x = load double, ptr %a