-
Notifications
You must be signed in to change notification settings - Fork 15.2k
[SLP] Include vectorized calls in spill cost #125650
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
Even though an intrinsic may be vectorized, the backend may end up scalarizing it.
Usually in this case the IntrCost >= CallCost, so NoCallIntrinsic will already detect the case when it's not scalarized and exclude it from the cost.
This fixes examples like
#include <math.h>
void f(double *f) {
double a = f[0], b = f[1];
a += 1;
b += 1;
a = tanh(a);
b = tanh(b);
a += 1;
b += 1;
f[0] = a;
f[1] = b;
}
From being unprofitably vectorized to this after llvm#124984
vsetivli zero, 2, e64, m1, ta, ma
vle64.v v8, (a0)
.Lpcrel_hi0:
auipc a0, %pcrel_hi(.LCPI0_0)
fld fs1, %pcrel_lo(.Lpcrel_hi0)(a0)
vfadd.vf v8, v8, fs1
addi a0, sp, 16
vs1r.v v8, (a0) # Unknown-size Folded Spill
vslidedown.vi v8, v8, 1
vfmv.f.s fa0, v8
call tanh
fmv.d fs0, fa0
fld fa0, 16(sp) # 8-byte Folded Reload
call tanh
vsetivli zero, 2, e64, m1, ta, ma
vfmv.v.f v8, fa0
vfslide1down.vf v8, v8, fs0
vfadd.vf v8, v8, fs1
vse64.v v8, (s0)
|
@llvm/pr-subscribers-llvm-transforms @llvm/pr-subscribers-vectorizers Author: Luke Lau (lukel97) ChangesEven though an intrinsic may be vectorized, the backend may end up scalarizing it. Usually in this case the IntrCost >= CallCost, so NoCallIntrinsic will already detect the case when it's not scalarized and exclude it from the cost. This fixes examples like From being unprofitably vectorized to this after #124984 Full diff: https://github.com/llvm/llvm-project/pull/125650.diff 2 Files Affected:
diff --git a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
index 539c9227af7e3a..ef8740ec2b451f 100644
--- a/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
+++ b/llvm/lib/Transforms/Vectorize/SLPVectorizer.cpp
@@ -12248,10 +12248,10 @@ InstructionCost BoUpSLP::getSpillCost() {
};
// Debug information does not impact spill cost.
- // Vectorized calls, represented as vector intrinsics, do not impact spill
- // cost.
+ // Vectorized calls, represented as vector intrinsics, may still impact
+ // spill cost if scalarized in codegen.
if (const auto *CB = dyn_cast<CallBase>(&*PrevInstIt);
- CB && !NoCallIntrinsic(CB) && !isVectorized(CB))
+ CB && !NoCallIntrinsic(CB))
NumCalls++;
++PrevInstIt;
diff --git a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
index 5bfd776512711f..4a2a6ace5f8b88 100644
--- a/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
+++ b/llvm/test/Transforms/SLPVectorizer/RISCV/math-function.ll
@@ -141,6 +141,52 @@ entry:
ret <4 x float> %vecins.3
}
+; Don't vectorize @llvm.exp because it will be scalarized in codegen.
+define void @exp_v2f64(ptr %a) {
+; CHECK-LABEL: define void @exp_v2f64
+; CHECK-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; CHECK-NEXT: [[X:%.*]] = load double, ptr [[A]], align 8
+; CHECK-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[A]], i64 1
+; CHECK-NEXT: [[Y:%.*]] = load double, ptr [[GEP]], align 8
+; CHECK-NEXT: [[X_ADD:%.*]] = fadd double [[X]], 1.000000e+00
+; CHECK-NEXT: [[Y_ADD:%.*]] = fadd double [[Y]], 1.000000e+00
+; CHECK-NEXT: [[X_EXP:%.*]] = call double @llvm.exp.f64(double [[X_ADD]])
+; CHECK-NEXT: [[Y_EXP:%.*]] = call double @llvm.exp.f64(double [[Y_ADD]])
+; CHECK-NEXT: [[X_ADD2:%.*]] = fadd double [[X_EXP]], 1.000000e+00
+; CHECK-NEXT: [[Y_ADD2:%.*]] = fadd double [[Y_EXP]], 1.000000e+00
+; CHECK-NEXT: store double [[X_ADD2]], ptr [[A]], align 8
+; CHECK-NEXT: store double [[Y_ADD2]], ptr [[GEP]], align 8
+; CHECK-NEXT: ret void
+;
+; DEFAULT-LABEL: define void @exp_v2f64
+; DEFAULT-SAME: (ptr [[A:%.*]]) #[[ATTR1]] {
+; DEFAULT-NEXT: [[X:%.*]] = load double, ptr [[A]], align 8
+; DEFAULT-NEXT: [[GEP:%.*]] = getelementptr double, ptr [[A]], i64 1
+; DEFAULT-NEXT: [[Y:%.*]] = load double, ptr [[GEP]], align 8
+; DEFAULT-NEXT: [[X_ADD:%.*]] = fadd double [[X]], 1.000000e+00
+; DEFAULT-NEXT: [[Y_ADD:%.*]] = fadd double [[Y]], 1.000000e+00
+; DEFAULT-NEXT: [[X_EXP:%.*]] = call double @llvm.exp.f64(double [[X_ADD]])
+; DEFAULT-NEXT: [[Y_EXP:%.*]] = call double @llvm.exp.f64(double [[Y_ADD]])
+; DEFAULT-NEXT: [[X_ADD2:%.*]] = fadd double [[X_EXP]], 1.000000e+00
+; DEFAULT-NEXT: [[Y_ADD2:%.*]] = fadd double [[Y_EXP]], 1.000000e+00
+; DEFAULT-NEXT: store double [[X_ADD2]], ptr [[A]], align 8
+; DEFAULT-NEXT: store double [[Y_ADD2]], ptr [[GEP]], align 8
+; DEFAULT-NEXT: ret void
+;
+ %x = load double, ptr %a
+ %gep = getelementptr double, ptr %a, i64 1
+ %y = load double, ptr %gep
+ %x.add = fadd double %x, 1.0
+ %y.add = fadd double %y, 1.0
+ %x.exp = call double @llvm.exp(double %x.add)
+ %y.exp = call double @llvm.exp(double %y.add)
+ %x.add2 = fadd double %x.exp, 1.0
+ %y.add2 = fadd double %y.exp, 1.0
+ store double %x.add2, ptr %a
+ store double %y.add2, ptr %gep
+ ret void
+}
+
declare float @expf(float) readonly nounwind willreturn
; We can not vectorized exp since RISCV has no such instruction.
|
|
Closing as I didn't see #125070 , sorry for the noise |
Even though an intrinsic may be vectorized, the backend may end up scalarizing it.
Usually in this case the IntrCost >= CallCost, so NoCallIntrinsic will already detect the case when it's not scalarized and exclude it from the cost.
This fixes examples like
From being unprofitably vectorized to this after #124984