Skip to content

Commit 2761386

Browse files
committed
[SLP] Include vectorized calls in spill cost
Even though an intrinsic may be vectorized, the backend may end up scalarizing it. Usually in this case the IntrCost >= CallCost, so NoCallIntrinsic will already detect the case when it's not scalarized and exclude it from the cost. This fixes examples like #include <math.h> void f(double *f) { double a = f[0], b = f[1]; a += 1; b += 1; a = tanh(a); b = tanh(b); a += 1; b += 1; f[0] = a; f[1] = b; } From being unprofitably vectorized to this after llvm#124984 vsetivli zero, 2, e64, m1, ta, ma vle64.v v8, (a0) .Lpcrel_hi0: auipc a0, %pcrel_hi(.LCPI0_0) fld fs1, %pcrel_lo(.Lpcrel_hi0)(a0) vfadd.vf v8, v8, fs1 addi a0, sp, 16 vs1r.v v8, (a0) # Unknown-size Folded Spill vslidedown.vi v8, v8, 1 vfmv.f.s fa0, v8 call tanh fmv.d fs0, fa0 fld fa0, 16(sp) # 8-byte Folded Reload call tanh vsetivli zero, 2, e64, m1, ta, ma vfmv.v.f v8, fa0 vfslide1down.vf v8, v8, fs0 vfadd.vf v8, v8, fs1 vse64.v v8, (s0)
1 parent 1a01b91 commit 2761386

File tree

2 files changed

+25
-13
lines changed

2 files changed

+25
-13
lines changed

llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -12248,10 +12248,10 @@ InstructionCost BoUpSLP::getSpillCost() {
1224812248
};
1224912249

1225012250
// Debug information does not impact spill cost.
12251-
// Vectorized calls, represented as vector intrinsics, do not impact spill
12252-
// cost.
12251+
// Vectorized calls, represented as vector intrinsics, may still impact
12252+
// spill cost if scalarized in codegen.
1225312253
if (const auto *CB = dyn_cast<CallBase>(&*PrevInstIt);
12254-
CB && !NoCallIntrinsic(CB) && !isVectorized(CB))
12254+
CB && !NoCallIntrinsic(CB))
1225512255
NumCalls++;
1225612256

1225712257
++PrevInstIt;

llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll

Lines changed: 22 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -145,20 +145,32 @@ entry:
145145
define void @exp_v2f64(ptr %a) {
146146
; CHECK-LABEL: define void @exp_v2f64
147147
; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
148-
; CHECK-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[A]], align 8
149-
; CHECK-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], splat (double 1.000000e+00)
150-
; CHECK-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.exp.v2f64(<2 x double> [[TMP2]])
151-
; CHECK-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], splat (double 1.000000e+00)
152-
; CHECK-NEXT: store <2 x double> [[TMP4]], ptr [[A]], align 8
148+
; CHECK-NEXT: [[X:%.*]] = load double, ptr [[A]], align 8
149+
; CHECK-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[A]], i64 1
150+
; CHECK-NEXT: [[Y:%.*]] = load double, ptr [[GEP]], align 8
151+
; CHECK-NEXT: [[X_ADD:%.*]] = fadd double [[X]], 1.000000e+00
152+
; CHECK-NEXT: [[Y_ADD:%.*]] = fadd double [[Y]], 1.000000e+00
153+
; CHECK-NEXT: [[X_EXP:%.*]] = call double @llvm.exp.f64(double [[X_ADD]])
154+
; CHECK-NEXT: [[Y_EXP:%.*]] = call double @llvm.exp.f64(double [[Y_ADD]])
155+
; CHECK-NEXT: [[X_ADD2:%.*]] = fadd double [[X_EXP]], 1.000000e+00
156+
; CHECK-NEXT: [[Y_ADD2:%.*]] = fadd double [[Y_EXP]], 1.000000e+00
157+
; CHECK-NEXT: store double [[X_ADD2]], ptr [[A]], align 8
158+
; CHECK-NEXT: store double [[Y_ADD2]], ptr [[GEP]], align 8
153159
; CHECK-NEXT: ret void
154160
;
155161
; DEFAULT-LABEL: define void @exp_v2f64
156162
; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
157-
; DEFAULT-NEXT: [[TMP1:%.*]] = load <2 x double>, ptr [[A]], align 8
158-
; DEFAULT-NEXT: [[TMP2:%.*]] = fadd <2 x double> [[TMP1]], splat (double 1.000000e+00)
159-
; DEFAULT-NEXT: [[TMP3:%.*]] = call <2 x double> @llvm.exp.v2f64(<2 x double> [[TMP2]])
160-
; DEFAULT-NEXT: [[TMP4:%.*]] = fadd <2 x double> [[TMP3]], splat (double 1.000000e+00)
161-
; DEFAULT-NEXT: store <2 x double> [[TMP4]], ptr [[A]], align 8
163+
; DEFAULT-NEXT: [[X:%.*]] = load double, ptr [[A]], align 8
164+
; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[A]], i64 1
165+
; DEFAULT-NEXT: [[Y:%.*]] = load double, ptr [[GEP]], align 8
166+
; DEFAULT-NEXT: [[X_ADD:%.*]] = fadd double [[X]], 1.000000e+00
167+
; DEFAULT-NEXT: [[Y_ADD:%.*]] = fadd double [[Y]], 1.000000e+00
168+
; DEFAULT-NEXT: [[X_EXP:%.*]] = call double @llvm.exp.f64(double [[X_ADD]])
169+
; DEFAULT-NEXT: [[Y_EXP:%.*]] = call double @llvm.exp.f64(double [[Y_ADD]])
170+
; DEFAULT-NEXT: [[X_ADD2:%.*]] = fadd double [[X_EXP]], 1.000000e+00
171+
; DEFAULT-NEXT: [[Y_ADD2:%.*]] = fadd double [[Y_EXP]], 1.000000e+00
172+
; DEFAULT-NEXT: store double [[X_ADD2]], ptr [[A]], align 8
173+
; DEFAULT-NEXT: store double [[Y_ADD2]], ptr [[GEP]], align 8
162174
; DEFAULT-NEXT: ret void
163175
;
164176
%x = load double, ptr %a

0 commit comments

Comments
 (0)