From 4b7c0f536264c57834b7b858670a98a3319c663d Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Fri, 1 Aug 2025 09:25:12 +0100 Subject: [PATCH 1/4] [LV] Pre-commit test for #151664 Hoisted vector instructions are costed incorrectly. --- .../Analysis/CostModel/AArch64/min-max.ll | 77 +++++++++++++++++++ .../Analysis/CostModel/AArch64/sve-min-max.ll | 71 +++++++++++++++++ .../pr151664-cost-hoisted-vector-scalable.ll | 62 +++++++++++++++ 3 files changed, 210 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll diff --git a/llvm/test/Analysis/CostModel/AArch64/min-max.ll b/llvm/test/Analysis/CostModel/AArch64/min-max.ll index b824f5309adc1..3c052b1eeea82 100644 --- a/llvm/test/Analysis/CostModel/AArch64/min-max.ll +++ b/llvm/test/Analysis/CostModel/AArch64/min-max.ll @@ -377,6 +377,66 @@ define void @maximum() { ret void } +define void @minimumnum() { +; CHECK-LABEL: 'minimumnum' +; CHECK-NEXT: Cost Model: Found costs of 3 for: %1 = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 3 for: %2 = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 10 for: %3 = call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 20 for: %4 = call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 3 for: %5 = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> poison, <2 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of 10 for: %6 = call <4 x double> @llvm.minimumnum.v4f64(<4 x double> poison, <4 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of 20 for: %7 = call <8 x double> @llvm.minimumnum.v8f64(<8 x double> poison, <8 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of 40 for: %8 = call <16 x double> @llvm.minimumnum.v16f64(<16 x double> poison, <16 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison) + call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison) + call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison) + call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison) + call <2 x double> @llvm.minimumnum.v2f64(<2 x double> poison, <2 x double> poison) + call <4 x double> @llvm.minimumnum.v4f64(<4 x double> poison, <4 x double> poison) + call <8 x double> @llvm.minimumnum.v8f64(<8 x double> poison, <8 x double> poison) + call <16 x double> @llvm.minimumnum.v16f64(<16 x double> poison, <16 x double> poison) + ret void +} + +define void @minimumnum_fp16() { +; CHECK-NOF16-LABEL: 'minimumnum_fp16' +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %1 = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %2 = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %3 = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %4 = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> poison, <16 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-F16-LABEL: 'minimumnum_fp16' +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %1 = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %2 = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %3 = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 10 for: %4 = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> poison, <16 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison) + call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison) + call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison) + call <16 x half> @llvm.minimumnum.v16f16(<16 x half> poison, <16 x half> poison) + ret void +} + +define void @minimumnum_bf16() { +; CHECK-LABEL: 'minimumnum_bf16' +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %1 = call <2 x bfloat> @llvm.minimumnum.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %2 = call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %3 = call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %4 = call <16 x bfloat> @llvm.minimumnum.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + call <2 x bfloat> @llvm.minimumnum.v2fb16(<2 x bfloat> poison, <2 x bfloat> poison) + call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison) + call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison) + call <16 x bfloat> @llvm.minimumnum.v6bf16(<16 x bfloat> poison, <16 x bfloat> poison) + ret void +} + declare i8 @llvm.umin.i8(i8, i8) declare i16 @llvm.umin.i16(i16, i16) declare i32 @llvm.umin.i32(i32, i32) @@ -512,3 +572,20 @@ declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>) declare <8 x float> @llvm.maximum.v8f32(<8 x float>, <8 x float>) declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>) declare <4 x double> @llvm.maximum.v4f64(<4 x double>, <4 x double>) + +declare <2 x half> @llvm.minimumnum.v2f16(<2 x half>, <2 x half>) +declare <4 x half> @llvm.minimumnum.v4f16(<4 x half>, <4 x half>) +declare <8 x half> @llvm.minimumnum.v8f16(<8 x half>, <8 x half>) +declare <16 x half> @llvm.minimumnum.v16f16(<16 x half>, <16 x half>) +declare <2 x bfloat> @llvm.minimumnum.2bf16(<2 x bfloat>, <2 x bfloat>) +declare <4 x bfloat> @llvm.minimumnum.4bf16(<4 x bfloat>, <4 x bfloat>) +declare <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat>, <8 x bfloat>) +declare <16 x bfloat> @llvm.minimumnum.v16bf16(<16 x bfloat>, <16 x bfloat>) +declare <2 x float> @llvm.minimumnum.v2f32(<2 x float>, <2 x float>) +declare <4 x float> @llvm.minimumnum.v4f32(<4 x float>, <4 x float>) +declare <8 x float> @llvm.minimumnum.v8f32(<8 x float>, <8 x float>) +declare <16 x float> @llvm.minimumnum.v16f32(<16 x float>, <16 x float>) +declare <2 x double> @llvm.minimumnum.v2f64(<2 x double>, <2 x double>) +declare <4 x double> @llvm.minimumnum.v4f64(<4 x double>, <4 x double>) +declare <8 x double> @llvm.minimumnum.nxv8f64(<8 x double>, <8 x double>) +declare <16 x double> @llvm.minimumnum.nxv16f64(<16 x double>, <16 x double>) diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll b/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll index 829ce127493ed..73574dd61d75c 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll @@ -255,6 +255,60 @@ define void @maximum() { ret void } +define void @minimumnum() { +; CHECK-LABEL: 'minimumnum' +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %1 = call @llvm.minimumnum.nxv2f32( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %2 = call @llvm.minimumnum.nxv4f32( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %3 = call @llvm.minimumnum.nxv8f32( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %4 = call @llvm.minimumnum.nxv16f32( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %5 = call @llvm.minimumnum.nxv2f64( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %6 = call @llvm.minimumnum.nxv4f64( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %7 = call @llvm.minimumnum.nxv8f64( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %8 = call @llvm.minimumnum.nxv16f64( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + call @llvm.minimumnum( poison, poison) + call @llvm.minimumnum( poison, poison) + call @llvm.minimumnum( poison, poison) + call @llvm.minimumnum( poison, poison) + call @llvm.minimumnum( poison, poison) + call @llvm.minimumnum( poison, poison) + call @llvm.minimumnum( poison, poison) + call @llvm.minimumnum( poison, poison) + ret void +} + +define void @minimumnum_fp16() { +; CHECK-LABEL: 'minimumnum_fp16' +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %1 = call @llvm.minimumnum.nxv2f16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %2 = call @llvm.minimumnum.nxv4f16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %3 = call @llvm.minimumnum.nxv8f16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %4 = call @llvm.minimumnum.nxv16f16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + call @llvm.minimumnum( poison, poison) + call @llvm.minimumnum( poison, poison) + call @llvm.minimumnum( poison, poison) + call @llvm.minimumnum( poison, poison) + ret void +} + +define void @minimumnum_bf16() { +; CHECK-LABEL: 'minimumnum_bf16' +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %1 = call @llvm.minimumnum.nxv2bf16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %2 = call @llvm.minimumnum.nxv4bf16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %3 = call @llvm.minimumnum.nxv8bf16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %4 = call @llvm.minimumnum.nxv16bf16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + call @llvm.minimumnum( poison, poison) + call @llvm.minimumnum( poison, poison) + call @llvm.minimumnum( poison, poison) + call @llvm.minimumnum( poison, poison) + ret void +} + + declare @llvm.umin.nxv4i8(, ) declare @llvm.umin.nxv8i8(, ) declare @llvm.umin.nxv16i8(, ) @@ -354,3 +408,20 @@ declare @llvm.maximum.nxv4f32(, @llvm.maximum.nxv8f32(, ) declare @llvm.maximum.nxv2f64(, ) declare @llvm.maximum.nxv4f64(, ) + +declare @llvm.minimumnum.nxv2f16(, ) +declare @llvm.minimumnum.nxv4f16(, ) +declare @llvm.minimumnum.nxv8f16(, ) +declare @llvm.minimumnum.nxv16f16(, ) +declare @llvm.minimumnum.nxv2bf16(, ) +declare @llvm.minimumnum.nxv4bf16(, ) +declare @llvm.minimumnum.nxv8bf16(, ) +declare @llvm.minimumnum.nxv16bf16(, ) +declare @llvm.minimumnum.nxv2f32(, ) +declare @llvm.minimumnum.nxv4f32(, ) +declare @llvm.minimumnum.nxv8f32(, ) +declare @llvm.minimumnum.nxv16f32(, ) +declare @llvm.minimumnum.nxv2f64(, ) +declare @llvm.minimumnum.nxv4f64(, ) +declare @llvm.minimumnum.nxv8f64(, ) +declare @llvm.minimumnum.nxv16f64(, ) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll new file mode 100644 index 0000000000000..2823712c9143a --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll @@ -0,0 +1,62 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --filter-out-after "^scalar.ph" --version 5 +; RUN: opt -passes=loop-vectorize -mtriple=aarch64 -mattr=+sve -S \ +; RUN: -debug-only=loop-vectorize %s 2>&1 | FileCheck %s + +; FIXME: Hoisted vector code should be costed with scalable cost. +; In this example, ` @llvm.minimumnum` has an invalid cost, +; and hence should not be produced by LoopVectorize. Unfortunately, what is +; actually costed is `<4 x float> @llvm.minimumnum`. + +; CHECK: LV: Found an estimated cost of 3 for VF 1 For instruction: %res = tail call float @llvm.minimumnum.f32(float 0.000000e+00, float 0.000000e+00) +define void @cost_hoisted_vector_code(ptr %p) { +; CHECK-LABEL: define void @cost_hoisted_vector_code( +; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[ENTRY:.*:]] +; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: [[TMP2:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP3:%.*]] = mul nuw i64 [[TMP2]], 8 +; CHECK-NEXT: [[N_MOD_VF:%.*]] = urem i64 -1, [[TMP3]] +; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 -1, [[N_MOD_VF]] +; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 +; CHECK-NEXT: [[TMP6:%.*]] = add i64 1, [[N_VEC]] +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.minimumnum.nxv4f32( zeroinitializer, zeroinitializer) +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX]] +; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() +; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 +; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[TMP10]] +; CHECK-NEXT: store [[TMP7]], ptr [[TMP8]], align 4 +; CHECK-NEXT: store [[TMP7]], ptr [[TMP11]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] +; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: [[CMP_N:%.*]] = icmp eq i64 -1, [[N_VEC]] +; CHECK-NEXT: br i1 [[CMP_N]], [[EXIT:label %.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; +entry: + br label %loop + +loop: ; preds = %loop, %entry + %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] + %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ] + %res = tail call float @llvm.minimumnum.f32(float 0.0, float 0.0) + %gep.p.red = getelementptr float, ptr %p, i64 %idx + store float %res, ptr %gep.p.red, align 4 + %idx.next = add i64 %idx, 1 + %iv.next = add i64 %iv, 1 + %exit.cond = icmp eq i64 %iv.next, 0 + br i1 %exit.cond, label %exit, label %loop + +exit: ; preds = %loop + ret void +} + +declare float @llvm.minimumnum.f32(float, float) From e44e014113ce1eb281d583e9cdc0141bc56c5630 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Fri, 1 Aug 2025 10:12:12 +0100 Subject: [PATCH 2/4] [CostModel, LV] Address review --- .../Analysis/CostModel/AArch64/min-max.ll | 101 +++++++++++++----- .../Analysis/CostModel/AArch64/sve-min-max.ll | 86 ++++++++------- .../pr151664-cost-hoisted-vector-scalable.ll | 12 ++- 3 files changed, 132 insertions(+), 67 deletions(-) diff --git a/llvm/test/Analysis/CostModel/AArch64/min-max.ll b/llvm/test/Analysis/CostModel/AArch64/min-max.ll index 3c052b1eeea82..9b0ae9e55f448 100644 --- a/llvm/test/Analysis/CostModel/AArch64/min-max.ll +++ b/llvm/test/Analysis/CostModel/AArch64/min-max.ll @@ -379,16 +379,20 @@ define void @maximum() { define void @minimumnum() { ; CHECK-LABEL: 'minimumnum' -; CHECK-NEXT: Cost Model: Found costs of 3 for: %1 = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison) -; CHECK-NEXT: Cost Model: Found costs of 3 for: %2 = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison) -; CHECK-NEXT: Cost Model: Found costs of 10 for: %3 = call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison) -; CHECK-NEXT: Cost Model: Found costs of 20 for: %4 = call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison) -; CHECK-NEXT: Cost Model: Found costs of 3 for: %5 = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> poison, <2 x double> poison) -; CHECK-NEXT: Cost Model: Found costs of 10 for: %6 = call <4 x double> @llvm.minimumnum.v4f64(<4 x double> poison, <4 x double> poison) -; CHECK-NEXT: Cost Model: Found costs of 20 for: %7 = call <8 x double> @llvm.minimumnum.v8f64(<8 x double> poison, <8 x double> poison) -; CHECK-NEXT: Cost Model: Found costs of 40 for: %8 = call <16 x double> @llvm.minimumnum.v16f64(<16 x double> poison, <16 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of 3 for: %1 = call float @llvm.minimumnum.f32(float poison, float poison) +; CHECK-NEXT: Cost Model: Found costs of 3 for: %2 = call double @llvm.minimumnum.f64(double poison, double poison) +; CHECK-NEXT: Cost Model: Found costs of 3 for: %3 = call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 3 for: %4 = call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 10 for: %5 = call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 20 for: %6 = call <16 x float> @llvm.minimumnum.v16f32(<16 x float> poison, <16 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 3 for: %7 = call <2 x double> @llvm.minimumnum.v2f64(<2 x double> poison, <2 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of 10 for: %8 = call <4 x double> @llvm.minimumnum.v4f64(<4 x double> poison, <4 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of 20 for: %9 = call <8 x double> @llvm.minimumnum.v8f64(<8 x double> poison, <8 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of 40 for: %10 = call <16 x double> @llvm.minimumnum.v16f64(<16 x double> poison, <16 x double> poison) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; + call float @llvm.minimumnum.f32(float poison, float poison) + call double @llvm.minimumnum.f64(double poison, double poison) call <2 x float> @llvm.minimumnum.v2f32(<2 x float> poison, <2 x float> poison) call <4 x float> @llvm.minimumnum.v4f32(<4 x float> poison, <4 x float> poison) call <8 x float> @llvm.minimumnum.v8f32(<8 x float> poison, <8 x float> poison) @@ -437,6 +441,70 @@ define void @minimumnum_bf16() { ret void } +define void @maximumnum() { +; CHECK-LABEL: 'maximumnum' +; CHECK-NEXT: Cost Model: Found costs of 3 for: %1 = call float @llvm.maximumnum.f32(float poison, float poison) +; CHECK-NEXT: Cost Model: Found costs of 3 for: %2 = call double @llvm.maximumnum.f64(double poison, double poison) +; CHECK-NEXT: Cost Model: Found costs of 3 for: %3 = call <2 x float> @llvm.maximumnum.v2f32(<2 x float> poison, <2 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 3 for: %4 = call <4 x float> @llvm.maximumnum.v4f32(<4 x float> poison, <4 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 10 for: %5 = call <8 x float> @llvm.maximumnum.v8f32(<8 x float> poison, <8 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 20 for: %6 = call <16 x float> @llvm.maximumnum.v16f32(<16 x float> poison, <16 x float> poison) +; CHECK-NEXT: Cost Model: Found costs of 3 for: %7 = call <2 x double> @llvm.maximumnum.v2f64(<2 x double> poison, <2 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of 10 for: %8 = call <4 x double> @llvm.maximumnum.v4f64(<4 x double> poison, <4 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of 20 for: %9 = call <8 x double> @llvm.maximumnum.v8f64(<8 x double> poison, <8 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of 40 for: %10 = call <16 x double> @llvm.maximumnum.v16f64(<16 x double> poison, <16 x double> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + call float @llvm.maximumnum.f32(float poison, float poison) + call double @llvm.maximumnum.f64(double poison, double poison) + call <2 x float> @llvm.maximumnum.v2f32(<2 x float> poison, <2 x float> poison) + call <4 x float> @llvm.maximumnum.v4f32(<4 x float> poison, <4 x float> poison) + call <8 x float> @llvm.maximumnum.v8f32(<8 x float> poison, <8 x float> poison) + call <16 x float> @llvm.maximumnum.v16f32(<16 x float> poison, <16 x float> poison) + call <2 x double> @llvm.maximumnum.v2f64(<2 x double> poison, <2 x double> poison) + call <4 x double> @llvm.maximumnum.v4f64(<4 x double> poison, <4 x double> poison) + call <8 x double> @llvm.maximumnum.v8f64(<8 x double> poison, <8 x double> poison) + call <16 x double> @llvm.maximumnum.v16f64(<16 x double> poison, <16 x double> poison) + ret void +} + +define void @maximumnum_fp16() { +; CHECK-NOF16-LABEL: 'maximumnum_fp16' +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %1 = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> poison, <2 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %2 = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> poison, <4 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %3 = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> poison, <8 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %4 = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> poison, <16 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; +; CHECK-F16-LABEL: 'maximumnum_fp16' +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %1 = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> poison, <2 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %2 = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> poison, <4 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %3 = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> poison, <8 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 10 for: %4 = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> poison, <16 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + call <2 x half> @llvm.maximumnum.v2f16(<2 x half> poison, <2 x half> poison) + call <4 x half> @llvm.maximumnum.v4f16(<4 x half> poison, <4 x half> poison) + call <8 x half> @llvm.maximumnum.v8f16(<8 x half> poison, <8 x half> poison) + call <16 x half> @llvm.maximumnum.v16f16(<16 x half> poison, <16 x half> poison) + ret void +} + +define void @maximumnum_bf16() { +; CHECK-LABEL: 'maximumnum_bf16' +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %1 = call <2 x bfloat> @llvm.maximumnum.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %2 = call <4 x bfloat> @llvm.maximumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %3 = call <8 x bfloat> @llvm.maximumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %4 = call <16 x bfloat> @llvm.maximumnum.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; + call <2 x bfloat> @llvm.maximumnum.v2fb16(<2 x bfloat> poison, <2 x bfloat> poison) + call <4 x bfloat> @llvm.maximumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison) + call <8 x bfloat> @llvm.maximumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison) + call <16 x bfloat> @llvm.maximumnum.v6bf16(<16 x bfloat> poison, <16 x bfloat> poison) + ret void +} + declare i8 @llvm.umin.i8(i8, i8) declare i16 @llvm.umin.i16(i16, i16) declare i32 @llvm.umin.i32(i32, i32) @@ -572,20 +640,3 @@ declare <4 x float> @llvm.maximum.v4f32(<4 x float>, <4 x float>) declare <8 x float> @llvm.maximum.v8f32(<8 x float>, <8 x float>) declare <2 x double> @llvm.maximum.v2f64(<2 x double>, <2 x double>) declare <4 x double> @llvm.maximum.v4f64(<4 x double>, <4 x double>) - -declare <2 x half> @llvm.minimumnum.v2f16(<2 x half>, <2 x half>) -declare <4 x half> @llvm.minimumnum.v4f16(<4 x half>, <4 x half>) -declare <8 x half> @llvm.minimumnum.v8f16(<8 x half>, <8 x half>) -declare <16 x half> @llvm.minimumnum.v16f16(<16 x half>, <16 x half>) -declare <2 x bfloat> @llvm.minimumnum.2bf16(<2 x bfloat>, <2 x bfloat>) -declare <4 x bfloat> @llvm.minimumnum.4bf16(<4 x bfloat>, <4 x bfloat>) -declare <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat>, <8 x bfloat>) -declare <16 x bfloat> @llvm.minimumnum.v16bf16(<16 x bfloat>, <16 x bfloat>) -declare <2 x float> @llvm.minimumnum.v2f32(<2 x float>, <2 x float>) -declare <4 x float> @llvm.minimumnum.v4f32(<4 x float>, <4 x float>) -declare <8 x float> @llvm.minimumnum.v8f32(<8 x float>, <8 x float>) -declare <16 x float> @llvm.minimumnum.v16f32(<16 x float>, <16 x float>) -declare <2 x double> @llvm.minimumnum.v2f64(<2 x double>, <2 x double>) -declare <4 x double> @llvm.minimumnum.v4f64(<4 x double>, <4 x double>) -declare <8 x double> @llvm.minimumnum.nxv8f64(<8 x double>, <8 x double>) -declare <16 x double> @llvm.minimumnum.nxv16f64(<16 x double>, <16 x double>) diff --git a/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll b/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll index 73574dd61d75c..5d1113388015b 100644 --- a/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll +++ b/llvm/test/Analysis/CostModel/AArch64/sve-min-max.ll @@ -266,6 +266,15 @@ define void @minimumnum() { ; CHECK-NEXT: Cost Model: Found costs of Invalid for: %7 = call @llvm.minimumnum.nxv8f64( poison, poison) ; CHECK-NEXT: Cost Model: Found costs of Invalid for: %8 = call @llvm.minimumnum.nxv16f64( poison, poison) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %10 = call @llvm.minimumnum.nxv2f16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %11 = call @llvm.minimumnum.nxv4f16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %12 = call @llvm.minimumnum.nxv8f16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %13 = call @llvm.minimumnum.nxv16f16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %14 = call @llvm.minimumnum.nxv2bf16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %15 = call @llvm.minimumnum.nxv4bf16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %16 = call @llvm.minimumnum.nxv8bf16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %17 = call @llvm.minimumnum.nxv16bf16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; call @llvm.minimumnum( poison, poison) call @llvm.minimumnum( poison, poison) @@ -276,35 +285,55 @@ define void @minimumnum() { call @llvm.minimumnum( poison, poison) call @llvm.minimumnum( poison, poison) ret void -} - -define void @minimumnum_fp16() { -; CHECK-LABEL: 'minimumnum_fp16' -; CHECK-NEXT: Cost Model: Found costs of Invalid for: %1 = call @llvm.minimumnum.nxv2f16( poison, poison) -; CHECK-NEXT: Cost Model: Found costs of Invalid for: %2 = call @llvm.minimumnum.nxv4f16( poison, poison) -; CHECK-NEXT: Cost Model: Found costs of Invalid for: %3 = call @llvm.minimumnum.nxv8f16( poison, poison) -; CHECK-NEXT: Cost Model: Found costs of Invalid for: %4 = call @llvm.minimumnum.nxv16f16( poison, poison) -; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void -; call @llvm.minimumnum( poison, poison) call @llvm.minimumnum( poison, poison) call @llvm.minimumnum( poison, poison) call @llvm.minimumnum( poison, poison) + call @llvm.minimumnum( poison, poison) + call @llvm.minimumnum( poison, poison) + call @llvm.minimumnum( poison, poison) + call @llvm.minimumnum( poison, poison) ret void } -define void @minimumnum_bf16() { -; CHECK-LABEL: 'minimumnum_bf16' -; CHECK-NEXT: Cost Model: Found costs of Invalid for: %1 = call @llvm.minimumnum.nxv2bf16( poison, poison) -; CHECK-NEXT: Cost Model: Found costs of Invalid for: %2 = call @llvm.minimumnum.nxv4bf16( poison, poison) -; CHECK-NEXT: Cost Model: Found costs of Invalid for: %3 = call @llvm.minimumnum.nxv8bf16( poison, poison) -; CHECK-NEXT: Cost Model: Found costs of Invalid for: %4 = call @llvm.minimumnum.nxv16bf16( poison, poison) +define void @maximumnum() { +; CHECK-LABEL: 'maximumnum' +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %1 = call @llvm.maximumnum.nxv2f32( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %2 = call @llvm.maximumnum.nxv4f32( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %3 = call @llvm.maximumnum.nxv8f32( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %4 = call @llvm.maximumnum.nxv16f32( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %5 = call @llvm.maximumnum.nxv2f64( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %6 = call @llvm.maximumnum.nxv4f64( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %7 = call @llvm.maximumnum.nxv8f64( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %8 = call @llvm.maximumnum.nxv16f64( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %10 = call @llvm.maximumnum.nxv2f16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %11 = call @llvm.maximumnum.nxv4f16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %12 = call @llvm.maximumnum.nxv8f16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %13 = call @llvm.maximumnum.nxv16f16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %14 = call @llvm.maximumnum.nxv2bf16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %15 = call @llvm.maximumnum.nxv4bf16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %16 = call @llvm.maximumnum.nxv8bf16( poison, poison) +; CHECK-NEXT: Cost Model: Found costs of Invalid for: %17 = call @llvm.maximumnum.nxv16bf16( poison, poison) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; - call @llvm.minimumnum( poison, poison) - call @llvm.minimumnum( poison, poison) - call @llvm.minimumnum( poison, poison) - call @llvm.minimumnum( poison, poison) + call @llvm.maximumnum( poison, poison) + call @llvm.maximumnum( poison, poison) + call @llvm.maximumnum( poison, poison) + call @llvm.maximumnum( poison, poison) + call @llvm.maximumnum( poison, poison) + call @llvm.maximumnum( poison, poison) + call @llvm.maximumnum( poison, poison) + call @llvm.maximumnum( poison, poison) + ret void + call @llvm.maximumnum( poison, poison) + call @llvm.maximumnum( poison, poison) + call @llvm.maximumnum( poison, poison) + call @llvm.maximumnum( poison, poison) + call @llvm.maximumnum( poison, poison) + call @llvm.maximumnum( poison, poison) + call @llvm.maximumnum( poison, poison) + call @llvm.maximumnum( poison, poison) ret void } @@ -408,20 +437,3 @@ declare @llvm.maximum.nxv4f32(, @llvm.maximum.nxv8f32(, ) declare @llvm.maximum.nxv2f64(, ) declare @llvm.maximum.nxv4f64(, ) - -declare @llvm.minimumnum.nxv2f16(, ) -declare @llvm.minimumnum.nxv4f16(, ) -declare @llvm.minimumnum.nxv8f16(, ) -declare @llvm.minimumnum.nxv16f16(, ) -declare @llvm.minimumnum.nxv2bf16(, ) -declare @llvm.minimumnum.nxv4bf16(, ) -declare @llvm.minimumnum.nxv8bf16(, ) -declare @llvm.minimumnum.nxv16bf16(, ) -declare @llvm.minimumnum.nxv2f32(, ) -declare @llvm.minimumnum.nxv4f32(, ) -declare @llvm.minimumnum.nxv8f32(, ) -declare @llvm.minimumnum.nxv16f32(, ) -declare @llvm.minimumnum.nxv2f64(, ) -declare @llvm.minimumnum.nxv4f64(, ) -declare @llvm.minimumnum.nxv8f64(, ) -declare @llvm.minimumnum.nxv16f64(, ) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll index 2823712c9143a..5785a5e3dd6a0 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll @@ -7,10 +7,10 @@ ; and hence should not be produced by LoopVectorize. Unfortunately, what is ; actually costed is `<4 x float> @llvm.minimumnum`. -; CHECK: LV: Found an estimated cost of 3 for VF 1 For instruction: %res = tail call float @llvm.minimumnum.f32(float 0.000000e+00, float 0.000000e+00) -define void @cost_hoisted_vector_code(ptr %p) { +; CHECK: LV: Found an estimated cost of 3 for VF 1 For instruction: %res = tail call float @llvm.minimumnum.f32(float %arg, float 0.000000e+00) +define void @cost_hoisted_vector_code(ptr %p, float %arg) { ; CHECK-LABEL: define void @cost_hoisted_vector_code( -; CHECK-SAME: ptr [[P:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: ptr [[P:%.*]], float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[ENTRY:.*:]] ; CHECK-NEXT: [[TMP0:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP1:%.*]] = mul nuw i64 [[TMP0]], 8 @@ -22,8 +22,10 @@ define void @cost_hoisted_vector_code(ptr %p) { ; CHECK-NEXT: [[N_VEC:%.*]] = sub i64 -1, [[N_MOD_VF]] ; CHECK-NEXT: [[TMP4:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP5:%.*]] = mul nuw i64 [[TMP4]], 8 +; CHECK-NEXT: [[BROADCAST_SPLATINSERT:%.*]] = insertelement poison, float [[ARG]], i64 0 +; CHECK-NEXT: [[BROADCAST_SPLAT:%.*]] = shufflevector [[BROADCAST_SPLATINSERT]], poison, zeroinitializer ; CHECK-NEXT: [[TMP6:%.*]] = add i64 1, [[N_VEC]] -; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.minimumnum.nxv4f32( zeroinitializer, zeroinitializer) +; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.minimumnum.nxv4f32( [[BROADCAST_SPLAT]], zeroinitializer) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] @@ -47,7 +49,7 @@ entry: loop: ; preds = %loop, %entry %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ] - %res = tail call float @llvm.minimumnum.f32(float 0.0, float 0.0) + %res = tail call float @llvm.minimumnum.f32(float %arg, float 0.0) %gep.p.red = getelementptr float, ptr %p, i64 %idx store float %res, ptr %gep.p.red, align 4 %idx.next = add i64 %idx, 1 From bde905f37fccac48869cb94cfc15f23538d0c45f Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Fri, 1 Aug 2025 10:24:32 +0100 Subject: [PATCH 3/4] [LV] Strip duplicate iv --- .../AArch64/pr151664-cost-hoisted-vector-scalable.ll | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll index 5785a5e3dd6a0..bdff766110238 100644 --- a/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll +++ b/llvm/test/Transforms/LoopVectorize/AArch64/pr151664-cost-hoisted-vector-scalable.ll @@ -7,7 +7,6 @@ ; and hence should not be produced by LoopVectorize. Unfortunately, what is ; actually costed is `<4 x float> @llvm.minimumnum`. -; CHECK: LV: Found an estimated cost of 3 for VF 1 For instruction: %res = tail call float @llvm.minimumnum.f32(float %arg, float 0.000000e+00) define void @cost_hoisted_vector_code(ptr %p, float %arg) { ; CHECK-LABEL: define void @cost_hoisted_vector_code( ; CHECK-SAME: ptr [[P:%.*]], float [[ARG:%.*]]) #[[ATTR0:[0-9]+]] { @@ -28,14 +27,15 @@ define void @cost_hoisted_vector_code(ptr %p, float %arg) { ; CHECK-NEXT: [[TMP7:%.*]] = call @llvm.minimumnum.nxv4f32( [[BROADCAST_SPLAT]], zeroinitializer) ; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] ; CHECK: [[VECTOR_BODY]]: -; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX1:%.*]] = phi i64 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[INDEX:%.*]] = add i64 1, [[INDEX1]] ; CHECK-NEXT: [[TMP8:%.*]] = getelementptr float, ptr [[P]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP9:%.*]] = call i64 @llvm.vscale.i64() ; CHECK-NEXT: [[TMP10:%.*]] = mul nuw i64 [[TMP9]], 4 ; CHECK-NEXT: [[TMP11:%.*]] = getelementptr float, ptr [[TMP8]], i64 [[TMP10]] ; CHECK-NEXT: store [[TMP7]], ptr [[TMP8]], align 4 ; CHECK-NEXT: store [[TMP7]], ptr [[TMP11]], align 4 -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], [[TMP5]] +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX1]], [[TMP5]] ; CHECK-NEXT: [[TMP12:%.*]] = icmp eq i64 [[INDEX_NEXT]], [[N_VEC]] ; CHECK-NEXT: br i1 [[TMP12]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] ; CHECK: [[MIDDLE_BLOCK]]: @@ -48,11 +48,9 @@ entry: loop: ; preds = %loop, %entry %iv = phi i64 [ 1, %entry ], [ %iv.next, %loop ] - %idx = phi i64 [ 0, %entry ], [ %idx.next, %loop ] %res = tail call float @llvm.minimumnum.f32(float %arg, float 0.0) - %gep.p.red = getelementptr float, ptr %p, i64 %idx + %gep.p.red = getelementptr float, ptr %p, i64 %iv store float %res, ptr %gep.p.red, align 4 - %idx.next = add i64 %idx, 1 %iv.next = add i64 %iv, 1 %exit.cond = icmp eq i64 %iv.next, 0 br i1 %exit.cond, label %exit, label %loop From f597ce63530450e0864c769dd722bb7e3dfaad31 Mon Sep 17 00:00:00 2001 From: Ramkumar Ramachandra Date: Fri, 1 Aug 2025 11:40:19 +0100 Subject: [PATCH 4/4] [CostModel] Add missing scalar tests --- .../Analysis/CostModel/AArch64/min-max.ll | 58 +++++++++++-------- 1 file changed, 34 insertions(+), 24 deletions(-) diff --git a/llvm/test/Analysis/CostModel/AArch64/min-max.ll b/llvm/test/Analysis/CostModel/AArch64/min-max.ll index 9b0ae9e55f448..a579eb37ff22a 100644 --- a/llvm/test/Analysis/CostModel/AArch64/min-max.ll +++ b/llvm/test/Analysis/CostModel/AArch64/min-max.ll @@ -406,19 +406,22 @@ define void @minimumnum() { define void @minimumnum_fp16() { ; CHECK-NOF16-LABEL: 'minimumnum_fp16' -; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %1 = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison) -; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %2 = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison) -; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %3 = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison) -; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %4 = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> poison, <16 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of 1 for: %1 = call half @llvm.minimumnum.f16(half poison, half poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %2 = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %3 = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %4 = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %5 = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> poison, <16 x half> poison) ; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-F16-LABEL: 'minimumnum_fp16' -; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %1 = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison) -; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %2 = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison) -; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %3 = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison) -; CHECK-F16-NEXT: Cost Model: Found costs of 10 for: %4 = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> poison, <16 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %1 = call half @llvm.minimumnum.f16(half poison, half poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %2 = call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %3 = call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %4 = call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 10 for: %5 = call <16 x half> @llvm.minimumnum.v16f16(<16 x half> poison, <16 x half> poison) ; CHECK-F16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; + call half @llvm.minimumnum.f32(half poison, half poison) call <2 x half> @llvm.minimumnum.v2f16(<2 x half> poison, <2 x half> poison) call <4 x half> @llvm.minimumnum.v4f16(<4 x half> poison, <4 x half> poison) call <8 x half> @llvm.minimumnum.v8f16(<8 x half> poison, <8 x half> poison) @@ -428,12 +431,14 @@ define void @minimumnum_fp16() { define void @minimumnum_bf16() { ; CHECK-LABEL: 'minimumnum_bf16' -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %1 = call <2 x bfloat> @llvm.minimumnum.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison) -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %2 = call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison) -; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %3 = call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison) -; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %4 = call <16 x bfloat> @llvm.minimumnum.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of 1 for: %1 = call bfloat @llvm.minimumnum.bf16(bfloat poison, bfloat poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %2 = call <2 x bfloat> @llvm.minimumnum.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %3 = call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %4 = call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %5 = call <16 x bfloat> @llvm.minimumnum.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; + call bfloat @llvm.minimumnum.f32(bfloat poison, bfloat poison) call <2 x bfloat> @llvm.minimumnum.v2fb16(<2 x bfloat> poison, <2 x bfloat> poison) call <4 x bfloat> @llvm.minimumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison) call <8 x bfloat> @llvm.minimumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison) @@ -470,19 +475,22 @@ define void @maximumnum() { define void @maximumnum_fp16() { ; CHECK-NOF16-LABEL: 'maximumnum_fp16' -; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %1 = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> poison, <2 x half> poison) -; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %2 = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> poison, <4 x half> poison) -; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %3 = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> poison, <8 x half> poison) -; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %4 = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> poison, <16 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of 1 for: %1 = call half @llvm.maximumnum.f16(half poison, half poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %2 = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> poison, <2 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %3 = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> poison, <4 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %4 = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> poison, <8 x half> poison) +; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %5 = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> poison, <16 x half> poison) ; CHECK-NOF16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; ; CHECK-F16-LABEL: 'maximumnum_fp16' -; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %1 = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> poison, <2 x half> poison) -; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %2 = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> poison, <4 x half> poison) -; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %3 = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> poison, <8 x half> poison) -; CHECK-F16-NEXT: Cost Model: Found costs of 10 for: %4 = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> poison, <16 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %1 = call half @llvm.maximumnum.f16(half poison, half poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %2 = call <2 x half> @llvm.maximumnum.v2f16(<2 x half> poison, <2 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %3 = call <4 x half> @llvm.maximumnum.v4f16(<4 x half> poison, <4 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 3 for: %4 = call <8 x half> @llvm.maximumnum.v8f16(<8 x half> poison, <8 x half> poison) +; CHECK-F16-NEXT: Cost Model: Found costs of 10 for: %5 = call <16 x half> @llvm.maximumnum.v16f16(<16 x half> poison, <16 x half> poison) ; CHECK-F16-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; + call half @llvm.maximumnum.f32(half poison, half poison) call <2 x half> @llvm.maximumnum.v2f16(<2 x half> poison, <2 x half> poison) call <4 x half> @llvm.maximumnum.v4f16(<4 x half> poison, <4 x half> poison) call <8 x half> @llvm.maximumnum.v8f16(<8 x half> poison, <8 x half> poison) @@ -492,12 +500,14 @@ define void @maximumnum_fp16() { define void @maximumnum_bf16() { ; CHECK-LABEL: 'maximumnum_bf16' -; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %1 = call <2 x bfloat> @llvm.maximumnum.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison) -; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %2 = call <4 x bfloat> @llvm.maximumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison) -; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %3 = call <8 x bfloat> @llvm.maximumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison) -; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %4 = call <16 x bfloat> @llvm.maximumnum.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of 1 for: %1 = call bfloat @llvm.maximumnum.bf16(bfloat poison, bfloat poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:4 CodeSize:3 Lat:4 SizeLat:4 for: %2 = call <2 x bfloat> @llvm.maximumnum.v2bf16(<2 x bfloat> poison, <2 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:10 CodeSize:7 Lat:10 SizeLat:10 for: %3 = call <4 x bfloat> @llvm.maximumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:22 CodeSize:15 Lat:22 SizeLat:22 for: %4 = call <8 x bfloat> @llvm.maximumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison) +; CHECK-NEXT: Cost Model: Found costs of RThru:44 CodeSize:30 Lat:44 SizeLat:44 for: %5 = call <16 x bfloat> @llvm.maximumnum.v16bf16(<16 x bfloat> poison, <16 x bfloat> poison) ; CHECK-NEXT: Cost Model: Found costs of RThru:0 CodeSize:1 Lat:1 SizeLat:1 for: ret void ; + call bfloat @llvm.maximumnum.f32(bfloat poison, bfloat poison) call <2 x bfloat> @llvm.maximumnum.v2fb16(<2 x bfloat> poison, <2 x bfloat> poison) call <4 x bfloat> @llvm.maximumnum.v4bf16(<4 x bfloat> poison, <4 x bfloat> poison) call <8 x bfloat> @llvm.maximumnum.v8bf16(<8 x bfloat> poison, <8 x bfloat> poison)