-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[LV] Pre-commit test for #151664 #151671
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
[LV] Pre-commit test for #151664 #151671
Conversation
Hoisted vector instructions are costed incorrectly.
|
@llvm/pr-subscribers-llvm-transforms Author: Ramkumar Ramachandra (artagnon) ChangesHoisted vector instructions are costed incorrectly. Full diff: https://github.com/llvm/llvm-project/pull/151671.diff 3 Files Affected:
diff --git a/llvm/test/Analysis/CostModel/AArch64/min-max.ll b/llvm/test/Analysis/CostModel/AArch64/min-max.ll
index b824f5309adc1..3c052b1eeea82 100644
--- a/llvm/test/Analysis/CostModel/AArch64/min-max.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/min-max.ll
@@ -377,6 +377,66 @@ define void @maximum() {
ret void
}
+define void @minimumnum() {
+; CHECK-LABEL: 'minimumnum'
+; CHECK-NEXT: Cost Model: Found costs of 3 for: %1 = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of 3 for: %2 = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of 10 for: %3 = call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of 20 for: %4 = call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of 3 for: %5 = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> poison, <2 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of 10 for: %6 = call <4 x double> @llvm.minimumnum.v4f64(<4 x double> poison, <4 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of 20 for: %7 = call <8 x double> @llvm.minimumnum.v8f64(<8 x double> poison, <8 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of 40 for: %8 = call <16 x double> @llvm.minimumnum.v16f64(<16 x double> poison, <16 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison)
+ call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison)
+ call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison)
+ call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison)
+ call <2 x double> @llvm.minimumnum.v2f64(<2 x double> poison, <2 x double> poison)
+ call <4 x double> @llvm.minimumnum.v4f64(<4 x double> poison, <4 x double> poison)
+ call <8 x double> @llvm.minimumnum.v8f64(<8 x double> poison, <8 x double> poison)
+ call <16 x double> @llvm.minimumnum.v16f64(<16 x double> poison, <16 x double> poison)
+ ret void
+}
+
+define void @minimumnum_fp16() {
+; CHECK-NOF16-LABEL: 'minimumnum_fp16'
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %1 = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %2 = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %3 = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %4 = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> poison, <16 x half> poison)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-F16-LABEL: 'minimumnum_fp16'
+; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %1 = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison)
+; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %2 = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison)
+; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %3 = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison)
+; CHECK-F16-NEXT: Cost Model: Found costs of 10 for: %4 = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> poison, <16 x half> poison)
+; CHECK-F16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison)
+ call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison)
+ call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison)
+ call <16 x half> @llvm.minimumnum.v16f16(<16 x half> poison, <16 x half> poison)
+ ret void
+}
+
+define void @minimumnum_bf16() {
+; CHECK-LABEL: 'minimumnum_bf16'
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %1 = call <2 x bfloat> @llvm.minimumnum.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %2 = call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %3 = call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %4 = call <16 x bfloat> @llvm.minimumnum.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call <2 x bfloat> @llvm.minimumnum.v2fb16(<2 x bfloat> poison, <2 x bfloat> poison)
+ call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison)
+ call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison)
+ call <16 x bfloat> @llvm.minimumnum.v6bf16(<16 x bfloat> poison, <16 x bfloat> poison)
+ ret void
+}
+
declare i8 @llvm.umin.i8(i8, i8)
declare i16 @llvm.umin.i16(i16, i16)
declare i32 @llvm.umin.i32(i32, i32)
@@ -512,3 +572,20 @@ declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
declare <8 x float> @llvm.maximum.v8f32(<8 x float>, <8 x float>)
declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
declare <4 x double> @llvm.maximum.v4f64(<4 x double>, <4 x double>)
+
+declare <2 x half> @llvm.minimumnum.v2f16(<2 x half>, <2 x half>)
+declare <4 x half> @llvm.minimumnum.v4f16(<4 x half>, <4 x half>)
+declare <8 x half> @llvm.minimumnum.v8f16(<8 x half>, <8 x half>)
+declare <16 x half> @llvm.minimumnum.v16f16(<16 x half>, <16 x half>)
+declare <2 x bfloat> @llvm.minimumnum.2bf16(<2 x bfloat>, <2 x bfloat>)
+declare <4 x bfloat> @llvm.minimumnum.4bf16(<4 x bfloat>, <4 x bfloat>)
+declare <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat>, <8 x bfloat>)
+declare <16 x bfloat> @llvm.minimumnum.v16bf16(<16 x bfloat>, <16 x bfloat>)
+declare <2 x float> @llvm.minimumnum.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.minimumnum.v4f32(<4 x float>, <4 x float>)
+declare <8 x float> @llvm.minimumnum.v8f32(<8 x float>, <8 x float>)
+declare <16 x float> @llvm.minimumnum.v16f32(<16 x float>, <16 x float>)
+declare <2 x double> @llvm.minimumnum.v2f64(<2 x double>, <2 x double>)
+declare <4 x double> @llvm.minimumnum.v4f64(<4 x double>, <4 x double>)
+declare <8 x double> @llvm.minimumnum.nxv8f64(<8 x double>, <8 x double>)
+declare <16 x double> @llvm.minimumnum.nxv16f64(<16 x double>, <16 x double>)
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll b/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll
index 829ce127493ed..73574dd61d75c 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll
@@ -255,6 +255,60 @@ define void @maximum() {
ret void
}
+define void @minimumnum() {
+; CHECK-LABEL: 'minimumnum'
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %1 = call <vscale x 2 x float> @llvm.minimumnum.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %2 = call <vscale x 4 x float> @llvm.minimumnum.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %3 = call <vscale x 8 x float> @llvm.minimumnum.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %4 = call <vscale x 16 x float> @llvm.minimumnum.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %5 = call <vscale x 2 x double> @llvm.minimumnum.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %6 = call <vscale x 4 x double> @llvm.minimumnum.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %7 = call <vscale x 8 x double> @llvm.minimumnum.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %8 = call <vscale x 16 x double> @llvm.minimumnum.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call <vscale x 2 x float> @llvm.minimumnum(<vscale x 2 x float> poison, <vscale x 2 x float> poison)
+ call <vscale x 4 x float> @llvm.minimumnum(<vscale x 4 x float> poison, <vscale x 4 x float> poison)
+ call <vscale x 8 x float> @llvm.minimumnum(<vscale x 8 x float> poison, <vscale x 8 x float> poison)
+ call <vscale x 16 x float> @llvm.minimumnum(<vscale x 16 x float> poison, <vscale x 16 x float> poison)
+ call <vscale x 2 x double> @llvm.minimumnum(<vscale x 2 x double> poison, <vscale x 2 x double> poison)
+ call <vscale x 4 x double> @llvm.minimumnum(<vscale x 4 x double> poison, <vscale x 4 x double> poison)
+ call <vscale x 8 x double> @llvm.minimumnum(<vscale x 8 x double> poison, <vscale x 8 x double> poison)
+ call <vscale x 16 x double> @llvm.minimumnum(<vscale x 16 x double> poison, <vscale x 16 x double> poison)
+ ret void
+}
+
+define void @minimumnum_fp16() {
+; CHECK-LABEL: 'minimumnum_fp16'
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %1 = call <vscale x 2 x half> @llvm.minimumnum.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %2 = call <vscale x 4 x half> @llvm.minimumnum.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %3 = call <vscale x 8 x half> @llvm.minimumnum.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %4 = call <vscale x 16 x half> @llvm.minimumnum.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call <vscale x 2 x half> @llvm.minimumnum(<vscale x 2 x half> poison, <vscale x 2 x half> poison)
+ call <vscale x 4 x half> @llvm.minimumnum(<vscale x 4 x half> poison, <vscale x 4 x half> poison)
+ call <vscale x 8 x half> @llvm.minimumnum(<vscale x 8 x half> poison, <vscale x 8 x half> poison)
+ call <vscale x 16 x half> @llvm.minimumnum(<vscale x 16 x half> poison, <vscale x 16 x half> poison)
+ ret void
+}
+
+define void @minimumnum_bf16() {
+; CHECK-LABEL: 'minimumnum_bf16'
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %1 = call <vscale x 2 x bfloat> @llvm.minimumnum.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %2 = call <vscale x 4 x bfloat> @llvm.minimumnum.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %3 = call <vscale x 8 x bfloat> @llvm.minimumnum.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %4 = call <vscale x 16 x bfloat> @llvm.minimumnum.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call <vscale x 2 x bfloat> @llvm.minimumnum(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+ call <vscale x 4 x bfloat> @llvm.minimumnum(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+ call <vscale x 8 x bfloat> @llvm.minimumnum(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+ call <vscale x 16 x bfloat> @llvm.minimumnum(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+ ret void
+}
+
+
declare <vscale x 4 x i8> @llvm.umin.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
declare <vscale x 8 x i8> @llvm.umin.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
declare <vscale x 16 x i8> @llvm.umin.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
@@ -354,3 +408,20 @@ declare <vscale x 4 x float> @llvm.maximum.nxv4f32(<vscale x 4 x float>, <vscale
declare <vscale x 8 x float> @llvm.maximum.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>)
declare <vscale x 2 x double> @llvm.maximum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 4 x double> @llvm.maximum.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>)
+
+declare <vscale x 2 x half> @llvm.minimumnum.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
+declare <vscale x 4 x half> @llvm.minimumnum.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
+declare <vscale x 8 x half> @llvm.minimumnum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 16 x half> @llvm.minimumnum.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>)
+declare <vscale x 2 x bfloat> @llvm.minimumnum.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>)
+declare <vscale x 4 x bfloat> @llvm.minimumnum.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>)
+declare <vscale x 8 x bfloat> @llvm.minimumnum.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
+declare <vscale x 16 x bfloat> @llvm.minimumnum.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>)
+declare <vscale x 2 x float> @llvm.minimumnum.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
+declare <vscale x 4 x float> @llvm.minimumnum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 8 x float> @llvm.minimumnum.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>)
+declare <vscale x 16 x float> @llvm.minimumnum.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>)
+declare <vscale x 2 x double> @llvm.minimumnum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 4 x double> @llvm.minimumnum.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>)
+declare <vscale x 8 x double> @llvm.minimumnum.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>)
+declare <vscale x 16 x double> @llvm.minimumnum.nxv16f64(<vscale x 16 x double>, <vscale x 16 x double>)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll
new file mode 100644
index 0000000000000..2823712c9143a
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5
+; RUN: opt -passes=loop-vectorize -mtriple=aarch64 -mattr=+sve -S \
+; RUN: -debug-only=loop-vectorize %s 2>&1 | FileCheck %s
+
+; FIXME: Hoisted vector code should be costed with scalable cost.
+; In this example, `<vscale x 4 x float> @llvm.minimumnum` has an invalid cost,
+; and hence should not be produced by LoopVectorize. Unfortunately, what is
+; actually costed is `<4 x float> @llvm.minimumnum`.
+
+; CHECK: LV: Found an estimated cost of 3 for VF 1 For instruction: %res = tail call float @llvm.minimumnum.f32(float 0.000000e+00, float 0.000000e+00)
+define void @cost_hoisted_vector_code(ptr %p) {
+; CHECK-LABEL: define void @cost_hoisted_vector_code(
+; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 -1, [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 -1, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 1, [[N_VEC]]
+; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x float> @llvm.minimumnum.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[TMP10]]
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP7]], ptr [[TMP8]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP7]], ptr [[TMP11]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 -1, [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ]
+ %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ]
+ %res = tail call float @llvm.minimumnum.f32(float 0.0, float 0.0)
+ %gep.p.red = getelementptr float, ptr %p, i64 %idx
+ store float %res, ptr %gep.p.red, align 4
+ %idx.next = add i64 %idx, 1
+ %iv.next = add i64 %iv, 1
+ %exit.cond = icmp eq i64 %iv.next, 0
+ br i1 %exit.cond, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret void
+}
+
+declare float @llvm.minimumnum.f32(float, float)
|
|
@llvm/pr-subscribers-llvm-analysis Author: Ramkumar Ramachandra (artagnon) ChangesHoisted vector instructions are costed incorrectly. Full diff: https://github.com/llvm/llvm-project/pull/151671.diff 3 Files Affected:
diff --git a/llvm/test/Analysis/CostModel/AArch64/min-max.ll b/llvm/test/Analysis/CostModel/AArch64/min-max.ll
index b824f5309adc1..3c052b1eeea82 100644
--- a/llvm/test/Analysis/CostModel/AArch64/min-max.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/min-max.ll
@@ -377,6 +377,66 @@ define void @maximum() {
ret void
}
+define void @minimumnum() {
+; CHECK-LABEL: 'minimumnum'
+; CHECK-NEXT: Cost Model: Found costs of 3 for: %1 = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of 3 for: %2 = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of 10 for: %3 = call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of 20 for: %4 = call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of 3 for: %5 = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> poison, <2 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of 10 for: %6 = call <4 x double> @llvm.minimumnum.v4f64(<4 x double> poison, <4 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of 20 for: %7 = call <8 x double> @llvm.minimumnum.v8f64(<8 x double> poison, <8 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of 40 for: %8 = call <16 x double> @llvm.minimumnum.v16f64(<16 x double> poison, <16 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison)
+ call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison)
+ call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison)
+ call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison)
+ call <2 x double> @llvm.minimumnum.v2f64(<2 x double> poison, <2 x double> poison)
+ call <4 x double> @llvm.minimumnum.v4f64(<4 x double> poison, <4 x double> poison)
+ call <8 x double> @llvm.minimumnum.v8f64(<8 x double> poison, <8 x double> poison)
+ call <16 x double> @llvm.minimumnum.v16f64(<16 x double> poison, <16 x double> poison)
+ ret void
+}
+
+define void @minimumnum_fp16() {
+; CHECK-NOF16-LABEL: 'minimumnum_fp16'
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %1 = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %2 = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %3 = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %4 = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> poison, <16 x half> poison)
+; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+; CHECK-F16-LABEL: 'minimumnum_fp16'
+; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %1 = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison)
+; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %2 = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison)
+; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %3 = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison)
+; CHECK-F16-NEXT: Cost Model: Found costs of 10 for: %4 = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> poison, <16 x half> poison)
+; CHECK-F16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison)
+ call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison)
+ call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison)
+ call <16 x half> @llvm.minimumnum.v16f16(<16 x half> poison, <16 x half> poison)
+ ret void
+}
+
+define void @minimumnum_bf16() {
+; CHECK-LABEL: 'minimumnum_bf16'
+; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %1 = call <2 x bfloat> @llvm.minimumnum.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %2 = call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %3 = call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %4 = call <16 x bfloat> @llvm.minimumnum.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call <2 x bfloat> @llvm.minimumnum.v2fb16(<2 x bfloat> poison, <2 x bfloat> poison)
+ call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison)
+ call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison)
+ call <16 x bfloat> @llvm.minimumnum.v6bf16(<16 x bfloat> poison, <16 x bfloat> poison)
+ ret void
+}
+
declare i8 @llvm.umin.i8(i8, i8)
declare i16 @llvm.umin.i16(i16, i16)
declare i32 @llvm.umin.i32(i32, i32)
@@ -512,3 +572,20 @@ declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>)
declare <8 x float> @llvm.maximum.v8f32(<8 x float>, <8 x float>)
declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>)
declare <4 x double> @llvm.maximum.v4f64(<4 x double>, <4 x double>)
+
+declare <2 x half> @llvm.minimumnum.v2f16(<2 x half>, <2 x half>)
+declare <4 x half> @llvm.minimumnum.v4f16(<4 x half>, <4 x half>)
+declare <8 x half> @llvm.minimumnum.v8f16(<8 x half>, <8 x half>)
+declare <16 x half> @llvm.minimumnum.v16f16(<16 x half>, <16 x half>)
+declare <2 x bfloat> @llvm.minimumnum.2bf16(<2 x bfloat>, <2 x bfloat>)
+declare <4 x bfloat> @llvm.minimumnum.4bf16(<4 x bfloat>, <4 x bfloat>)
+declare <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat>, <8 x bfloat>)
+declare <16 x bfloat> @llvm.minimumnum.v16bf16(<16 x bfloat>, <16 x bfloat>)
+declare <2 x float> @llvm.minimumnum.v2f32(<2 x float>, <2 x float>)
+declare <4 x float> @llvm.minimumnum.v4f32(<4 x float>, <4 x float>)
+declare <8 x float> @llvm.minimumnum.v8f32(<8 x float>, <8 x float>)
+declare <16 x float> @llvm.minimumnum.v16f32(<16 x float>, <16 x float>)
+declare <2 x double> @llvm.minimumnum.v2f64(<2 x double>, <2 x double>)
+declare <4 x double> @llvm.minimumnum.v4f64(<4 x double>, <4 x double>)
+declare <8 x double> @llvm.minimumnum.nxv8f64(<8 x double>, <8 x double>)
+declare <16 x double> @llvm.minimumnum.nxv16f64(<16 x double>, <16 x double>)
diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll b/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll
index 829ce127493ed..73574dd61d75c 100644
--- a/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll
+++ b/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll
@@ -255,6 +255,60 @@ define void @maximum() {
ret void
}
+define void @minimumnum() {
+; CHECK-LABEL: 'minimumnum'
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %1 = call <vscale x 2 x float> @llvm.minimumnum.nxv2f32(<vscale x 2 x float> poison, <vscale x 2 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %2 = call <vscale x 4 x float> @llvm.minimumnum.nxv4f32(<vscale x 4 x float> poison, <vscale x 4 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %3 = call <vscale x 8 x float> @llvm.minimumnum.nxv8f32(<vscale x 8 x float> poison, <vscale x 8 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %4 = call <vscale x 16 x float> @llvm.minimumnum.nxv16f32(<vscale x 16 x float> poison, <vscale x 16 x float> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %5 = call <vscale x 2 x double> @llvm.minimumnum.nxv2f64(<vscale x 2 x double> poison, <vscale x 2 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %6 = call <vscale x 4 x double> @llvm.minimumnum.nxv4f64(<vscale x 4 x double> poison, <vscale x 4 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %7 = call <vscale x 8 x double> @llvm.minimumnum.nxv8f64(<vscale x 8 x double> poison, <vscale x 8 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %8 = call <vscale x 16 x double> @llvm.minimumnum.nxv16f64(<vscale x 16 x double> poison, <vscale x 16 x double> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call <vscale x 2 x float> @llvm.minimumnum(<vscale x 2 x float> poison, <vscale x 2 x float> poison)
+ call <vscale x 4 x float> @llvm.minimumnum(<vscale x 4 x float> poison, <vscale x 4 x float> poison)
+ call <vscale x 8 x float> @llvm.minimumnum(<vscale x 8 x float> poison, <vscale x 8 x float> poison)
+ call <vscale x 16 x float> @llvm.minimumnum(<vscale x 16 x float> poison, <vscale x 16 x float> poison)
+ call <vscale x 2 x double> @llvm.minimumnum(<vscale x 2 x double> poison, <vscale x 2 x double> poison)
+ call <vscale x 4 x double> @llvm.minimumnum(<vscale x 4 x double> poison, <vscale x 4 x double> poison)
+ call <vscale x 8 x double> @llvm.minimumnum(<vscale x 8 x double> poison, <vscale x 8 x double> poison)
+ call <vscale x 16 x double> @llvm.minimumnum(<vscale x 16 x double> poison, <vscale x 16 x double> poison)
+ ret void
+}
+
+define void @minimumnum_fp16() {
+; CHECK-LABEL: 'minimumnum_fp16'
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %1 = call <vscale x 2 x half> @llvm.minimumnum.nxv2f16(<vscale x 2 x half> poison, <vscale x 2 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %2 = call <vscale x 4 x half> @llvm.minimumnum.nxv4f16(<vscale x 4 x half> poison, <vscale x 4 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %3 = call <vscale x 8 x half> @llvm.minimumnum.nxv8f16(<vscale x 8 x half> poison, <vscale x 8 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %4 = call <vscale x 16 x half> @llvm.minimumnum.nxv16f16(<vscale x 16 x half> poison, <vscale x 16 x half> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call <vscale x 2 x half> @llvm.minimumnum(<vscale x 2 x half> poison, <vscale x 2 x half> poison)
+ call <vscale x 4 x half> @llvm.minimumnum(<vscale x 4 x half> poison, <vscale x 4 x half> poison)
+ call <vscale x 8 x half> @llvm.minimumnum(<vscale x 8 x half> poison, <vscale x 8 x half> poison)
+ call <vscale x 16 x half> @llvm.minimumnum(<vscale x 16 x half> poison, <vscale x 16 x half> poison)
+ ret void
+}
+
+define void @minimumnum_bf16() {
+; CHECK-LABEL: 'minimumnum_bf16'
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %1 = call <vscale x 2 x bfloat> @llvm.minimumnum.nxv2bf16(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %2 = call <vscale x 4 x bfloat> @llvm.minimumnum.nxv4bf16(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %3 = call <vscale x 8 x bfloat> @llvm.minimumnum.nxv8bf16(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of Invalid for: %4 = call <vscale x 16 x bfloat> @llvm.minimumnum.nxv16bf16(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void
+;
+ call <vscale x 2 x bfloat> @llvm.minimumnum(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison)
+ call <vscale x 4 x bfloat> @llvm.minimumnum(<vscale x 4 x bfloat> poison, <vscale x 4 x bfloat> poison)
+ call <vscale x 8 x bfloat> @llvm.minimumnum(<vscale x 8 x bfloat> poison, <vscale x 8 x bfloat> poison)
+ call <vscale x 16 x bfloat> @llvm.minimumnum(<vscale x 16 x bfloat> poison, <vscale x 16 x bfloat> poison)
+ ret void
+}
+
+
declare <vscale x 4 x i8> @llvm.umin.nxv4i8(<vscale x 4 x i8>, <vscale x 4 x i8>)
declare <vscale x 8 x i8> @llvm.umin.nxv8i8(<vscale x 8 x i8>, <vscale x 8 x i8>)
declare <vscale x 16 x i8> @llvm.umin.nxv16i8(<vscale x 16 x i8>, <vscale x 16 x i8>)
@@ -354,3 +408,20 @@ declare <vscale x 4 x float> @llvm.maximum.nxv4f32(<vscale x 4 x float>, <vscale
declare <vscale x 8 x float> @llvm.maximum.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>)
declare <vscale x 2 x double> @llvm.maximum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
declare <vscale x 4 x double> @llvm.maximum.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>)
+
+declare <vscale x 2 x half> @llvm.minimumnum.nxv2f16(<vscale x 2 x half>, <vscale x 2 x half>)
+declare <vscale x 4 x half> @llvm.minimumnum.nxv4f16(<vscale x 4 x half>, <vscale x 4 x half>)
+declare <vscale x 8 x half> @llvm.minimumnum.nxv8f16(<vscale x 8 x half>, <vscale x 8 x half>)
+declare <vscale x 16 x half> @llvm.minimumnum.nxv16f16(<vscale x 16 x half>, <vscale x 16 x half>)
+declare <vscale x 2 x bfloat> @llvm.minimumnum.nxv2bf16(<vscale x 2 x bfloat>, <vscale x 2 x bfloat>)
+declare <vscale x 4 x bfloat> @llvm.minimumnum.nxv4bf16(<vscale x 4 x bfloat>, <vscale x 4 x bfloat>)
+declare <vscale x 8 x bfloat> @llvm.minimumnum.nxv8bf16(<vscale x 8 x bfloat>, <vscale x 8 x bfloat>)
+declare <vscale x 16 x bfloat> @llvm.minimumnum.nxv16bf16(<vscale x 16 x bfloat>, <vscale x 16 x bfloat>)
+declare <vscale x 2 x float> @llvm.minimumnum.nxv2f32(<vscale x 2 x float>, <vscale x 2 x float>)
+declare <vscale x 4 x float> @llvm.minimumnum.nxv4f32(<vscale x 4 x float>, <vscale x 4 x float>)
+declare <vscale x 8 x float> @llvm.minimumnum.nxv8f32(<vscale x 8 x float>, <vscale x 8 x float>)
+declare <vscale x 16 x float> @llvm.minimumnum.nxv16f32(<vscale x 16 x float>, <vscale x 16 x float>)
+declare <vscale x 2 x double> @llvm.minimumnum.nxv2f64(<vscale x 2 x double>, <vscale x 2 x double>)
+declare <vscale x 4 x double> @llvm.minimumnum.nxv4f64(<vscale x 4 x double>, <vscale x 4 x double>)
+declare <vscale x 8 x double> @llvm.minimumnum.nxv8f64(<vscale x 8 x double>, <vscale x 8 x double>)
+declare <vscale x 16 x double> @llvm.minimumnum.nxv16f64(<vscale x 16 x double>, <vscale x 16 x double>)
diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll
new file mode 100644
index 0000000000000..2823712c9143a
--- /dev/null
+++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll
@@ -0,0 +1,62 @@
+; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5
+; RUN: opt -passes=loop-vectorize -mtriple=aarch64 -mattr=+sve -S \
+; RUN: -debug-only=loop-vectorize %s 2>&1 | FileCheck %s
+
+; FIXME: Hoisted vector code should be costed with scalable cost.
+; In this example, `<vscale x 4 x float> @llvm.minimumnum` has an invalid cost,
+; and hence should not be produced by LoopVectorize. Unfortunately, what is
+; actually costed is `<4 x float> @llvm.minimumnum`.
+
+; CHECK: LV: Found an estimated cost of 3 for VF 1 For instruction: %res = tail call float @llvm.minimumnum.f32(float 0.000000e+00, float 0.000000e+00)
+define void @cost_hoisted_vector_code(ptr %p) {
+; CHECK-LABEL: define void @cost_hoisted_vector_code(
+; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] {
+; CHECK-NEXT: [[ENTRY:.*:]]
+; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8
+; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]]
+; CHECK: [[VECTOR_PH]]:
+; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8
+; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 -1, [[TMP3]]
+; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 -1, [[N_MOD_VF]]
+; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8
+; CHECK-NEXT: [[TMP6:%.*]] = add i64 1, [[N_VEC]]
+; CHECK-NEXT: [[TMP7:%.*]] = call <vscale x 4 x float> @llvm.minimumnum.nxv4f32(<vscale x 4 x float> zeroinitializer, <vscale x 4 x float> zeroinitializer)
+; CHECK-NEXT: br label %[[VECTOR_BODY:.*]]
+; CHECK: [[VECTOR_BODY]]:
+; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ]
+; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX]]
+; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64()
+; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4
+; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[TMP10]]
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP7]], ptr [[TMP8]], align 4
+; CHECK-NEXT: store <vscale x 4 x float> [[TMP7]], ptr [[TMP11]], align 4
+; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]]
+; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]]
+; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]
+; CHECK: [[MIDDLE_BLOCK]]:
+; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 -1, [[N_VEC]]
+; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]]
+; CHECK: [[SCALAR_PH]]:
+;
+entry:
+ br label %loop
+
+loop: ; preds = %loop, %entry
+ %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ]
+ %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ]
+ %res = tail call float @llvm.minimumnum.f32(float 0.0, float 0.0)
+ %gep.p.red = getelementptr float, ptr %p, i64 %idx
+ store float %res, ptr %gep.p.red, align 4
+ %idx.next = add i64 %idx, 1
+ %iv.next = add i64 %iv, 1
+ %exit.cond = icmp eq i64 %iv.next, 0
+ br i1 %exit.cond, label %exit, label %loop
+
+exit: ; preds = %loop
+ ret void
+}
+
+declare float @llvm.minimumnum.f32(float, float)
|
davemgreen
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks for adding these cost-model tests, they look like a good addition.
llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll
Outdated
Show resolved
Hide resolved
llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll
Outdated
Show resolved
Hide resolved
davemgreen
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Thanks. LGTM
| call <vscale x 4 x half> @llvm.minimumnum(<vscale x 4 x half> poison, <vscale x 4 x half> poison) | ||
| call <vscale x 8 x half> @llvm.minimumnum(<vscale x 8 x half> poison, <vscale x 8 x half> poison) | ||
| call <vscale x 16 x half> @llvm.minimumnum(<vscale x 16 x half> poison, <vscale x 16 x half> poison) | ||
| call <vscale x 2 x bfloat> @llvm.minimumnum(<vscale x 2 x bfloat> poison, <vscale x 2 x bfloat> poison) |
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
BF16 is actually a different arch extension, but we can fix that later when we add proper costs for it.
|
LLVM Buildbot has detected a new failure on builder Full details are available at: https://lab.llvm.org/buildbot/#/builders/11/builds/20898 Here is the relevant piece of the build log for the reference |
|
Oops, forgot the |
Hoisted vector instructions are costed incorrectly.