From 084efd92871d5ac2f7dace668cb43986954defc4 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Thu, 5 Jun 2025 20:47:32 +0200 Subject: [PATCH 01/13] Prototype fix --- .../InstCombine/InstCombineCalls.cpp | 19 +++++++++++++++++++ llvm/test/Transforms/InstCombine/select.ll | 15 +++++++++++++++ 2 files changed, 34 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index cfb4af391b540..930819d24393a 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1654,6 +1654,25 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (Value *FreedOp = getFreedOperand(&CI, &TLI)) return visitFree(CI, FreedOp); + if (Function *F = CI.getCalledFunction()) { + if (F->getIntrinsicID() == Intrinsic::umin || F->getIntrinsicID() == Intrinsic::umax) { + for (Value *Arg : CI.args()) { + auto *SI = dyn_cast(Arg); + if (!SI) + continue; + + auto *TrueC = dyn_cast(SI->getTrueValue()); + auto *FalseC = dyn_cast(SI->getFalseValue()); + + // Block only if the select is masking, e.g. select(cond, val, -1) + if ((TrueC && TrueC->isAllOnesValue()) || (FalseC && FalseC->isAllOnesValue())) { + LLVM_DEBUG(dbgs() << "InstCombine: skipping umin/umax folding for masked select\n"); + return nullptr; + } + } + } + } + // If the caller function (i.e. us, the function that contains this CallInst) // is nounwind, mark the call as nounwind, even if the callee isn't. if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) { diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index e16f6ad2cfc9b..09cb84cde07ca 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -5047,3 +5047,18 @@ define <2 x ptr> @select_freeze_constant_expression_vector_gep(i1 %cond, <2 x pt %sel = select i1 %cond, <2 x ptr> %y, <2 x ptr> %freeze ret <2 x ptr> %sel } + +declare i8 @llvm.umin.i8(i8, i8) + +define i8 @no_fold_masked_min(i8 %acc, i8 %val, i8 %mask) { +; CHECK-LABEL: @no_fold_masked_min( +; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[MASK:%.*]], 0 +; CHECK-NEXT: [[MASKED_VAL:%.*]] = select i1 [[COND:%.*]], i8 [[VAL:%.*]], i8 -1 +; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[ACC:%.*]], i8 [[MASKED_VAL:%.*]]) +; CHECK-NEXT: ret i8 [[RES]] +; + %cond = icmp eq i8 %mask, 0 + %masked_val = select i1 %cond, i8 %val, i8 -1 + %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val) + ret i8 %res +} From 1b3e66224eef8f475e4f6cf60865b2b10901a33d Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Thu, 5 Jun 2025 21:03:53 +0200 Subject: [PATCH 02/13] Fix formatting --- llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 930819d24393a..0e5c95c7445dd 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1655,7 +1655,8 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { return visitFree(CI, FreedOp); if (Function *F = CI.getCalledFunction()) { - if (F->getIntrinsicID() == Intrinsic::umin || F->getIntrinsicID() == Intrinsic::umax) { + if (F->getIntrinsicID() == Intrinsic::umin || + F->getIntrinsicID() == Intrinsic::umax) { for (Value *Arg : CI.args()) { auto *SI = dyn_cast(Arg); if (!SI) @@ -1665,8 +1666,11 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { auto *FalseC = dyn_cast(SI->getFalseValue()); // Block only if the select is masking, e.g. select(cond, val, -1) - if ((TrueC && TrueC->isAllOnesValue()) || (FalseC && FalseC->isAllOnesValue())) { - LLVM_DEBUG(dbgs() << "InstCombine: skipping umin/umax folding for masked select\n"); + if ((TrueC && TrueC->isAllOnesValue()) || + (FalseC && FalseC->isAllOnesValue())) { + LLVM_DEBUG( + dbgs() + << "InstCombine: skipping umin/umax folding for masked select\n"); return nullptr; } } From d87b193e4832ad2984df5a72d1e2aeda614662bc Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Thu, 5 Jun 2025 23:04:05 +0200 Subject: [PATCH 03/13] Move checks to FoldOpIntoSelect --- .../InstCombine/InstCombineCalls.cpp | 23 ------------------- .../InstCombine/InstructionCombining.cpp | 19 +++++++++++++++ 2 files changed, 19 insertions(+), 23 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp index 0e5c95c7445dd..cfb4af391b540 100644 --- a/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp +++ b/llvm/lib/Transforms/InstCombine/InstCombineCalls.cpp @@ -1654,29 +1654,6 @@ Instruction *InstCombinerImpl::visitCallInst(CallInst &CI) { if (Value *FreedOp = getFreedOperand(&CI, &TLI)) return visitFree(CI, FreedOp); - if (Function *F = CI.getCalledFunction()) { - if (F->getIntrinsicID() == Intrinsic::umin || - F->getIntrinsicID() == Intrinsic::umax) { - for (Value *Arg : CI.args()) { - auto *SI = dyn_cast(Arg); - if (!SI) - continue; - - auto *TrueC = dyn_cast(SI->getTrueValue()); - auto *FalseC = dyn_cast(SI->getFalseValue()); - - // Block only if the select is masking, e.g. select(cond, val, -1) - if ((TrueC && TrueC->isAllOnesValue()) || - (FalseC && FalseC->isAllOnesValue())) { - LLVM_DEBUG( - dbgs() - << "InstCombine: skipping umin/umax folding for masked select\n"); - return nullptr; - } - } - } - } - // If the caller function (i.e. us, the function that contains this CallInst) // is nounwind, mark the call as nounwind, even if the callee isn't. if (CI.getFunction()->doesNotThrow() && !CI.doesNotThrow()) { diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 439a86d951a83..29211b4ac0ad1 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1719,6 +1719,25 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, if (SI->getType()->isIntOrIntVectorTy(1)) return nullptr; + if (auto *II = dyn_cast(&Op)) { + switch (II->getIntrinsicID()) { + case Intrinsic::umin: + case Intrinsic::smin: + if (ConstantInt *C = dyn_cast(FV)) + if (C->isAllOnesValue()) + return nullptr; + break; + case Intrinsic::umax: + case Intrinsic::smax: + if (ConstantInt *C = dyn_cast(FV)) + if (C->isZero()) + return nullptr; + break; + default: + break; + } + } + // Test if a FCmpInst instruction is used exclusively by a select as // part of a minimum or maximum operation. If so, refrain from doing // any other folding. This helps out other analyses which understand From 69eaf81f28214760c080296c7aed23250668a6b9 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Thu, 5 Jun 2025 23:13:30 +0200 Subject: [PATCH 04/13] Fix formatting --- .../InstCombine/InstructionCombining.cpp | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 29211b4ac0ad1..53efd629ea19d 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1721,20 +1721,20 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, if (auto *II = dyn_cast(&Op)) { switch (II->getIntrinsicID()) { - case Intrinsic::umin: - case Intrinsic::smin: - if (ConstantInt *C = dyn_cast(FV)) - if (C->isAllOnesValue()) - return nullptr; - break; - case Intrinsic::umax: - case Intrinsic::smax: - if (ConstantInt *C = dyn_cast(FV)) - if (C->isZero()) - return nullptr; - break; - default: - break; + case Intrinsic::umin: + case Intrinsic::smin: + if (ConstantInt *C = dyn_cast(FV)) + if (C->isAllOnesValue()) + return nullptr; + break; + case Intrinsic::umax: + case Intrinsic::smax: + if (ConstantInt *C = dyn_cast(FV)) + if (C->isZero()) + return nullptr; + break; + default: + break; } } From 8240c6c8eedb3a499e4f3f0c457fd847e86da10e Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Sat, 7 Jun 2025 08:07:36 +0200 Subject: [PATCH 05/13] Apply suggested fix --- .../InstCombine/InstructionCombining.cpp | 24 ++++------------ llvm/test/Transforms/InstCombine/select.ll | 28 +++++++++---------- 2 files changed, 20 insertions(+), 32 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 53efd629ea19d..23a3c87a640e1 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1719,24 +1719,12 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, if (SI->getType()->isIntOrIntVectorTy(1)) return nullptr; - if (auto *II = dyn_cast(&Op)) { - switch (II->getIntrinsicID()) { - case Intrinsic::umin: - case Intrinsic::smin: - if (ConstantInt *C = dyn_cast(FV)) - if (C->isAllOnesValue()) - return nullptr; - break; - case Intrinsic::umax: - case Intrinsic::smax: - if (ConstantInt *C = dyn_cast(FV)) - if (C->isZero()) - return nullptr; - break; - default: - break; - } - } + if (isa(&Op)) + for (Value *IntrinOp : Op.operands()) + if (auto *PN = dyn_cast(IntrinOp)) + for (Value *PhiOp : PN->operands()) + if (PhiOp == &Op) + return nullptr; // Test if a FCmpInst instruction is used exclusively by a select as // part of a minimum or maximum operation. If so, refrain from doing diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 09cb84cde07ca..13464187e6e81 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -5048,17 +5048,17 @@ define <2 x ptr> @select_freeze_constant_expression_vector_gep(i1 %cond, <2 x pt ret <2 x ptr> %sel } -declare i8 @llvm.umin.i8(i8, i8) - -define i8 @no_fold_masked_min(i8 %acc, i8 %val, i8 %mask) { -; CHECK-LABEL: @no_fold_masked_min( -; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[MASK:%.*]], 0 -; CHECK-NEXT: [[MASKED_VAL:%.*]] = select i1 [[COND:%.*]], i8 [[VAL:%.*]], i8 -1 -; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[ACC:%.*]], i8 [[MASKED_VAL:%.*]]) -; CHECK-NEXT: ret i8 [[RES]] -; - %cond = icmp eq i8 %mask, 0 - %masked_val = select i1 %cond, i8 %val, i8 -1 - %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val) - ret i8 %res -} +; declare i8 @llvm.umin.i8(i8, i8) +; +; define i8 @no_fold_masked_min(i8 %acc, i8 %val, i8 %mask) { +; ; CHECK-LABEL: @no_fold_masked_min( +; ; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[MASK:%.*]], 0 +; ; CHECK-NEXT: [[MASKED_VAL:%.*]] = select i1 [[COND:%.*]], i8 [[VAL:%.*]], i8 -1 +; ; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[ACC:%.*]], i8 [[MASKED_VAL:%.*]]) +; ; CHECK-NEXT: ret i8 [[RES]] +; ; +; %cond = icmp eq i8 %mask, 0 +; %masked_val = select i1 %cond, i8 %val, i8 -1 +; %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val) +; ret i8 %res +; } From 8a7663b66339bfd3cf59bbbeb488b1dffd0fb533 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Wed, 11 Jun 2025 22:29:22 +0200 Subject: [PATCH 06/13] Update expected output --- .../PhaseOrdering/X86/vector-reductions.ll | 48 +++++++++++++++---- 1 file changed, 38 insertions(+), 10 deletions(-) diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll index f8450766037b2..8ee3345a963a9 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll @@ -332,20 +332,48 @@ define i8 @masked_min_reduction(ptr %data, ptr %mask) { ; CHECK-LABEL: @masked_min_reduction( ; CHECK-NEXT: entry: ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] -; CHECK: loop: +; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[ACC:%.*]] = phi i8 [ -1, [[ENTRY]] ], [ [[TMP21:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <32 x i8> [ , [[ENTRY]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <32 x i8> [ , [[ENTRY]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <32 x i8> [ , [[ENTRY]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <32 x i8> [ , [[ENTRY]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[DATA:%.*]] = getelementptr i8, ptr [[DATA1:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[DATA]], align 1 +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DATA]], i64 32 +; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DATA]], i64 64 +; CHECK-NEXT: [[TMP3:%.*]] = getelementptr i8, ptr [[DATA]], i64 96 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <32 x i8>, ptr [[DATA]], align 1 +; CHECK-NEXT: [[WIDE_LOAD4:%.*]] = load <32 x i8>, ptr [[TMP1]], align 1 +; CHECK-NEXT: [[WIDE_LOAD5:%.*]] = load <32 x i8>, ptr [[TMP2]], align 1 +; CHECK-NEXT: [[WIDE_LOAD6:%.*]] = load <32 x i8>, ptr [[TMP3]], align 1 ; CHECK-NEXT: [[TMP7:%.*]] = getelementptr i8, ptr [[MASK:%.*]], i64 [[INDEX]] -; CHECK-NEXT: [[M:%.*]] = load i8, ptr [[TMP7]], align 1 -; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[M]], 0 -; CHECK-NEXT: [[TMP0:%.*]] = tail call i8 @llvm.umin.i8(i8 [[ACC]], i8 [[VAL]]) -; CHECK-NEXT: [[TMP21]] = select i1 [[COND]], i8 [[TMP0]], i8 [[ACC]] -; CHECK-NEXT: [[INDEX_NEXT]] = add nuw nsw i64 [[INDEX]], 1 +; CHECK-NEXT: [[TMP5:%.*]] = getelementptr i8, ptr [[TMP7]], i64 32 +; CHECK-NEXT: [[TMP6:%.*]] = getelementptr i8, ptr [[TMP7]], i64 64 +; CHECK-NEXT: [[TMP22:%.*]] = getelementptr i8, ptr [[TMP7]], i64 96 +; CHECK-NEXT: [[WIDE_LOAD7:%.*]] = load <32 x i8>, ptr [[TMP7]], align 1 +; CHECK-NEXT: [[WIDE_LOAD8:%.*]] = load <32 x i8>, ptr [[TMP5]], align 1 +; CHECK-NEXT: [[WIDE_LOAD9:%.*]] = load <32 x i8>, ptr [[TMP6]], align 1 +; CHECK-NEXT: [[WIDE_LOAD10:%.*]] = load <32 x i8>, ptr [[TMP22]], align 1 +; CHECK-NEXT: [[TMP8:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD7]], zeroinitializer +; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD8]], zeroinitializer +; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD9]], zeroinitializer +; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD10]], zeroinitializer +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP8]], <32 x i8> [[WIDE_LOAD]], <32 x i8> +; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP9]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> +; CHECK-NEXT: [[TMP14:%.*]] = select <32 x i1> [[TMP10]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> +; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP11]], <32 x i8> [[WIDE_LOAD6]], <32 x i8> +; CHECK-NEXT: [[TMP16]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[TMP12]]) +; CHECK-NEXT: [[TMP17]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[TMP13]]) +; CHECK-NEXT: [[TMP18]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[TMP14]]) +; CHECK-NEXT: [[TMP19]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI3]], <32 x i8> [[TMP15]]) +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 -; CHECK-NEXT: br i1 [[TMP20]], label [[EXIT:%.*]], label [[VECTOR_BODY]] -; CHECK: exit: +; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: middle.block: +; CHECK-NEXT: [[RDX_MINMAX:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[TMP16]], <32 x i8> [[TMP17]]) +; CHECK-NEXT: [[RDX_MINMAX11:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[RDX_MINMAX]], <32 x i8> [[TMP18]]) +; CHECK-NEXT: [[RDX_MINMAX12:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[RDX_MINMAX11]], <32 x i8> [[TMP19]]) +; CHECK-NEXT: [[TMP21:%.*]] = tail call i8 @llvm.vector.reduce.umin.v32i8(<32 x i8> [[RDX_MINMAX12]]) ; CHECK-NEXT: ret i8 [[TMP21]] ; entry: From 7929ed17610d2ff709392b9f3124f00ad274bede Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Wed, 11 Jun 2025 22:58:08 +0200 Subject: [PATCH 07/13] Add a comment --- llvm/lib/Transforms/InstCombine/InstructionCombining.cpp | 2 ++ 1 file changed, 2 insertions(+) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 23a3c87a640e1..291c738e94b85 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1719,6 +1719,8 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, if (SI->getType()->isIntOrIntVectorTy(1)) return nullptr; + // Avoid breaking min/max reduction pattern, + // which is necessary for vectorization later. if (isa(&Op)) for (Value *IntrinOp : Op.operands()) if (auto *PN = dyn_cast(IntrinOp)) From 22ef2528d9299e9b02ef5b5535f6c8900bdf377d Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Wed, 11 Jun 2025 23:00:38 +0200 Subject: [PATCH 08/13] Add a test --- llvm/test/Transforms/InstCombine/select.ll | 76 +++++++++++++++---- .../PhaseOrdering/X86/vector-reductions.ll | 2 - 2 files changed, 62 insertions(+), 16 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 13464187e6e81..937a5f0360a1d 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -5048,17 +5048,65 @@ define <2 x ptr> @select_freeze_constant_expression_vector_gep(i1 %cond, <2 x pt ret <2 x ptr> %sel } -; declare i8 @llvm.umin.i8(i8, i8) -; -; define i8 @no_fold_masked_min(i8 %acc, i8 %val, i8 %mask) { -; ; CHECK-LABEL: @no_fold_masked_min( -; ; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[MASK:%.*]], 0 -; ; CHECK-NEXT: [[MASKED_VAL:%.*]] = select i1 [[COND:%.*]], i8 [[VAL:%.*]], i8 -1 -; ; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[ACC:%.*]], i8 [[MASKED_VAL:%.*]]) -; ; CHECK-NEXT: ret i8 [[RES]] -; ; -; %cond = icmp eq i8 %mask, 0 -; %masked_val = select i1 %cond, i8 %val, i8 -1 -; %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val) -; ret i8 %res -; } +declare i8 @llvm.umin.i8(i8, i8) + +define i8 @no_fold_masked_min(i8 %acc, i8 %val, i8 %mask) { +; CHECK-LABEL: @no_fold_masked_min( +; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[MASK:%.*]], 0 +; CHECK-NEXT: [[MASKED_VAL:%.*]] = select i1 [[COND]], i8 [[VAL:%.*]], i8 -1 +; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[ACC:%.*]], i8 [[MASKED_VAL]]) +; CHECK-NEXT: ret i8 [[RES]] +; + %cond = icmp eq i8 %mask, 0 + %masked_val = select i1 %cond, i8 %val, i8 -1 + %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val) + ret i8 %res +} + +define void @no_fold_masked_min_loop(ptr nocapture readonly %vals, ptr nocapture readonly %masks, ptr nocapture %out, i64 %n) { +; CHECK-LABEL: @no_fold_masked_min_loop( +; CHECK-NEXT: entry: +; CHECK-NEXT: br label [[LOOP:%.*]] +; CHECK: loop: +; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[NEXT_INDEX:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[ACC:%.*]] = phi i8 [ -1, [[ENTRY]] ], [ [[RES:%.*]], [[LOOP]] ] +; CHECK-NEXT: [[VAL_PTR:%.*]] = getelementptr inbounds i8, ptr [[VALS:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[MASK_PTR:%.*]] = getelementptr inbounds i8, ptr [[MASKS:%.*]], i64 [[INDEX]] +; CHECK-NEXT: [[VAL:%.*]] = load i8, ptr [[VAL_PTR]], align 1 +; CHECK-NEXT: [[MASK:%.*]] = load i8, ptr [[MASK_PTR]], align 1 +; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[MASK]], 0 +; CHECK-NEXT: [[MASKED_VAL:%.*]] = select i1 [[COND]], i8 [[VAL]], i8 -1 +; CHECK-NEXT: [[RES]] = call i8 @llvm.umin.i8(i8 [[ACC]], i8 [[MASKED_VAL]]) +; CHECK-NEXT: [[NEXT_INDEX]] = add i64 [[INDEX]], 1 +; CHECK-NEXT: [[DONE:%.*]] = icmp eq i64 [[NEXT_INDEX]], [[N:%.*]] +; CHECK-NEXT: br i1 [[DONE]], label [[EXIT:%.*]], label [[LOOP]] +; CHECK: exit: +; CHECK-NEXT: store i8 [[RES]], ptr [[OUT:%.*]], align 1 +; CHECK-NEXT: ret void +; + +entry: + br label %loop + +loop: + %index = phi i64 [0, %entry], [%next_index, %loop] + %acc = phi i8 [255, %entry], [%res, %loop] + + %val_ptr = getelementptr inbounds i8, ptr %vals, i64 %index + %mask_ptr = getelementptr inbounds i8, ptr %masks, i64 %index + + %val = load i8, ptr %val_ptr, align 1 + %mask = load i8, ptr %mask_ptr, align 1 + + %cond = icmp eq i8 %mask, 0 + %masked_val = select i1 %cond, i8 %val, i8 -1 + %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val) + + %next_index = add i64 %index, 1 + %done = icmp eq i64 %next_index, %n + br i1 %done, label %exit, label %loop + +exit: + store i8 %res, ptr %out, align 1 + ret void +} diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll index 8ee3345a963a9..5e679e692fd8d 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll @@ -326,8 +326,6 @@ cleanup: ret i1 %retval.0 } -; From https://github.com/llvm/llvm-project/issues/139050. -; FIXME: This should be vectorized. define i8 @masked_min_reduction(ptr %data, ptr %mask) { ; CHECK-LABEL: @masked_min_reduction( ; CHECK-NEXT: entry: From d424540814671d5cd83feed87fddcf467860119f Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Wed, 11 Jun 2025 23:30:21 +0200 Subject: [PATCH 09/13] Update tests assertions --- llvm/test/Transforms/InstCombine/select.ll | 1 - .../PhaseOrdering/X86/vector-reductions.ll | 16 ++++++++-------- 2 files changed, 8 insertions(+), 9 deletions(-) diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 937a5f0360a1d..67143bfe8f65c 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -5084,7 +5084,6 @@ define void @no_fold_masked_min_loop(ptr nocapture readonly %vals, ptr nocapture ; CHECK-NEXT: store i8 [[RES]], ptr [[OUT:%.*]], align 1 ; CHECK-NEXT: ret void ; - entry: br label %loop diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll index 5e679e692fd8d..2ec48a8637dae 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll @@ -332,10 +332,10 @@ define i8 @masked_min_reduction(ptr %data, ptr %mask) { ; CHECK-NEXT: br label [[VECTOR_BODY:%.*]] ; CHECK: vector.body: ; CHECK-NEXT: [[INDEX:%.*]] = phi i64 [ 0, [[ENTRY:%.*]] ], [ [[INDEX_NEXT:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <32 x i8> [ , [[ENTRY]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <32 x i8> [ , [[ENTRY]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <32 x i8> [ , [[ENTRY]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] -; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <32 x i8> [ , [[ENTRY]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP16:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI1:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP17:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI2:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP18:%.*]], [[VECTOR_BODY]] ] +; CHECK-NEXT: [[VEC_PHI3:%.*]] = phi <32 x i8> [ splat (i8 -1), [[ENTRY]] ], [ [[TMP19:%.*]], [[VECTOR_BODY]] ] ; CHECK-NEXT: [[DATA:%.*]] = getelementptr i8, ptr [[DATA1:%.*]], i64 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i8, ptr [[DATA]], i64 32 ; CHECK-NEXT: [[TMP2:%.*]] = getelementptr i8, ptr [[DATA]], i64 64 @@ -356,10 +356,10 @@ define i8 @masked_min_reduction(ptr %data, ptr %mask) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD8]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD9]], zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD10]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP8]], <32 x i8> [[WIDE_LOAD]], <32 x i8> -; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP9]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> -; CHECK-NEXT: [[TMP14:%.*]] = select <32 x i1> [[TMP10]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> -; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP11]], <32 x i8> [[WIDE_LOAD6]], <32 x i8> +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP8]], <32 x i8> [[WIDE_LOAD]], <32 x i8> splat (i8 -1) +; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP9]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> splat (i8 -1) +; CHECK-NEXT: [[TMP14:%.*]] = select <32 x i1> [[TMP10]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> splat (i8 -1) +; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP11]], <32 x i8> [[WIDE_LOAD6]], <32 x i8> splat (i8 -1) ; CHECK-NEXT: [[TMP16]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[TMP12]]) ; CHECK-NEXT: [[TMP17]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[TMP13]]) ; CHECK-NEXT: [[TMP18]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[TMP14]]) From 304cf20e9def8081c7234f7d3552e4d42e8504e1 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Wed, 11 Jun 2025 23:44:41 +0200 Subject: [PATCH 10/13] Revert suggested fix --- .../InstCombine/InstructionCombining.cpp | 22 ++++++++++++++----- 1 file changed, 17 insertions(+), 5 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index 291c738e94b85..fa79b12b47bff 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1721,12 +1721,24 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, // Avoid breaking min/max reduction pattern, // which is necessary for vectorization later. - if (isa(&Op)) - for (Value *IntrinOp : Op.operands()) - if (auto *PN = dyn_cast(IntrinOp)) - for (Value *PhiOp : PN->operands()) - if (PhiOp == &Op) + if (auto *II = dyn_cast(&Op)) { + switch (II->getIntrinsicID()) { + case Intrinsic::umin: + case Intrinsic::smin: + if (ConstantInt *C = dyn_cast(FV)) + if (C->isAllOnesValue()) return nullptr; + break; + case Intrinsic::umax: + case Intrinsic::smax: + if (ConstantInt *C = dyn_cast(FV)) + if (C->isZero()) + return nullptr; + break; + default: + break; + } + } // Test if a FCmpInst instruction is used exclusively by a select as // part of a minimum or maximum operation. If so, refrain from doing From 1165581c55aaf2a089e486665d95bc30c28ce0c2 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Wed, 11 Jun 2025 23:46:02 +0200 Subject: [PATCH 11/13] Update tests assertions --- .../PhaseOrdering/X86/vector-reductions.ll | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll index 2ec48a8637dae..45632e878021e 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll @@ -356,14 +356,14 @@ define i8 @masked_min_reduction(ptr %data, ptr %mask) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD8]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD9]], zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD10]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP8]], <32 x i8> [[WIDE_LOAD]], <32 x i8> splat (i8 -1) -; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP9]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> splat (i8 -1) -; CHECK-NEXT: [[TMP14:%.*]] = select <32 x i1> [[TMP10]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> splat (i8 -1) -; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP11]], <32 x i8> [[WIDE_LOAD6]], <32 x i8> splat (i8 -1) -; CHECK-NEXT: [[TMP16]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[TMP12]]) -; CHECK-NEXT: [[TMP17]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[TMP13]]) -; CHECK-NEXT: [[TMP18]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[TMP14]]) -; CHECK-NEXT: [[TMP19]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI3]], <32 x i8> [[TMP15]]) +; CHECK-NEXT: [[TMP12:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[WIDE_LOAD]]) +; CHECK-NEXT: [[TMP16]] = select <32 x i1> [[TMP8]], <32 x i8> [[TMP12]], <32 x i8> [[VEC_PHI]] +; CHECK-NEXT: [[TMP14:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[WIDE_LOAD4]]) +; CHECK-NEXT: [[TMP17]] = select <32 x i1> [[TMP9]], <32 x i8> [[TMP14]], <32 x i8> [[VEC_PHI1]] +; CHECK-NEXT: [[TMP23:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[WIDE_LOAD5]]) +; CHECK-NEXT: [[TMP18]] = select <32 x i1> [[TMP10]], <32 x i8> [[TMP23]], <32 x i8> [[VEC_PHI2]] +; CHECK-NEXT: [[TMP24:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI3]], <32 x i8> [[WIDE_LOAD6]]) +; CHECK-NEXT: [[TMP19]] = select <32 x i1> [[TMP11]], <32 x i8> [[TMP24]], <32 x i8> [[VEC_PHI3]] ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] From 0d48fc43c52bd59ec26bbe717433dc4e9bc6ec21 Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Thu, 12 Jun 2025 00:08:49 +0200 Subject: [PATCH 12/13] Reformat --- .../InstCombine/InstructionCombining.cpp | 28 +++++++++---------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index fa79b12b47bff..b51328f111b94 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1723,20 +1723,20 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, // which is necessary for vectorization later. if (auto *II = dyn_cast(&Op)) { switch (II->getIntrinsicID()) { - case Intrinsic::umin: - case Intrinsic::smin: - if (ConstantInt *C = dyn_cast(FV)) - if (C->isAllOnesValue()) - return nullptr; - break; - case Intrinsic::umax: - case Intrinsic::smax: - if (ConstantInt *C = dyn_cast(FV)) - if (C->isZero()) - return nullptr; - break; - default: - break; + case Intrinsic::umin: + case Intrinsic::smin: + if (ConstantInt *C = dyn_cast(FV)) + if (C->isAllOnesValue()) + return nullptr; + break; + case Intrinsic::umax: + case Intrinsic::smax: + if (ConstantInt *C = dyn_cast(FV)) + if (C->isZero()) + return nullptr; + break; + default: + break; } } From 22183ade298ed277eb555aebd0f64af398e94e9e Mon Sep 17 00:00:00 2001 From: Konstantin Bogdanov Date: Fri, 13 Jun 2025 12:49:18 +0200 Subject: [PATCH 13/13] Reapply suggested fix --- .../InstCombine/InstructionCombining.cpp | 24 +++++-------------- llvm/test/Transforms/InstCombine/select.ll | 15 ------------ .../PhaseOrdering/X86/vector-reductions.ll | 16 ++++++------- 3 files changed, 14 insertions(+), 41 deletions(-) diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp index b51328f111b94..291c738e94b85 100644 --- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1721,24 +1721,12 @@ Instruction *InstCombinerImpl::FoldOpIntoSelect(Instruction &Op, SelectInst *SI, // Avoid breaking min/max reduction pattern, // which is necessary for vectorization later. - if (auto *II = dyn_cast(&Op)) { - switch (II->getIntrinsicID()) { - case Intrinsic::umin: - case Intrinsic::smin: - if (ConstantInt *C = dyn_cast(FV)) - if (C->isAllOnesValue()) - return nullptr; - break; - case Intrinsic::umax: - case Intrinsic::smax: - if (ConstantInt *C = dyn_cast(FV)) - if (C->isZero()) - return nullptr; - break; - default: - break; - } - } + if (isa(&Op)) + for (Value *IntrinOp : Op.operands()) + if (auto *PN = dyn_cast(IntrinOp)) + for (Value *PhiOp : PN->operands()) + if (PhiOp == &Op) + return nullptr; // Test if a FCmpInst instruction is used exclusively by a select as // part of a minimum or maximum operation. If so, refrain from doing diff --git a/llvm/test/Transforms/InstCombine/select.ll b/llvm/test/Transforms/InstCombine/select.ll index 67143bfe8f65c..ef5874ffd46ad 100644 --- a/llvm/test/Transforms/InstCombine/select.ll +++ b/llvm/test/Transforms/InstCombine/select.ll @@ -5048,21 +5048,6 @@ define <2 x ptr> @select_freeze_constant_expression_vector_gep(i1 %cond, <2 x pt ret <2 x ptr> %sel } -declare i8 @llvm.umin.i8(i8, i8) - -define i8 @no_fold_masked_min(i8 %acc, i8 %val, i8 %mask) { -; CHECK-LABEL: @no_fold_masked_min( -; CHECK-NEXT: [[COND:%.*]] = icmp eq i8 [[MASK:%.*]], 0 -; CHECK-NEXT: [[MASKED_VAL:%.*]] = select i1 [[COND]], i8 [[VAL:%.*]], i8 -1 -; CHECK-NEXT: [[RES:%.*]] = call i8 @llvm.umin.i8(i8 [[ACC:%.*]], i8 [[MASKED_VAL]]) -; CHECK-NEXT: ret i8 [[RES]] -; - %cond = icmp eq i8 %mask, 0 - %masked_val = select i1 %cond, i8 %val, i8 -1 - %res = call i8 @llvm.umin.i8(i8 %acc, i8 %masked_val) - ret i8 %res -} - define void @no_fold_masked_min_loop(ptr nocapture readonly %vals, ptr nocapture readonly %masks, ptr nocapture %out, i64 %n) { ; CHECK-LABEL: @no_fold_masked_min_loop( ; CHECK-NEXT: entry: diff --git a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll index 45632e878021e..2ec48a8637dae 100644 --- a/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll +++ b/llvm/test/Transforms/PhaseOrdering/X86/vector-reductions.ll @@ -356,14 +356,14 @@ define i8 @masked_min_reduction(ptr %data, ptr %mask) { ; CHECK-NEXT: [[TMP9:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD8]], zeroinitializer ; CHECK-NEXT: [[TMP10:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD9]], zeroinitializer ; CHECK-NEXT: [[TMP11:%.*]] = icmp eq <32 x i8> [[WIDE_LOAD10]], zeroinitializer -; CHECK-NEXT: [[TMP12:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[WIDE_LOAD]]) -; CHECK-NEXT: [[TMP16]] = select <32 x i1> [[TMP8]], <32 x i8> [[TMP12]], <32 x i8> [[VEC_PHI]] -; CHECK-NEXT: [[TMP14:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[WIDE_LOAD4]]) -; CHECK-NEXT: [[TMP17]] = select <32 x i1> [[TMP9]], <32 x i8> [[TMP14]], <32 x i8> [[VEC_PHI1]] -; CHECK-NEXT: [[TMP23:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[WIDE_LOAD5]]) -; CHECK-NEXT: [[TMP18]] = select <32 x i1> [[TMP10]], <32 x i8> [[TMP23]], <32 x i8> [[VEC_PHI2]] -; CHECK-NEXT: [[TMP24:%.*]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI3]], <32 x i8> [[WIDE_LOAD6]]) -; CHECK-NEXT: [[TMP19]] = select <32 x i1> [[TMP11]], <32 x i8> [[TMP24]], <32 x i8> [[VEC_PHI3]] +; CHECK-NEXT: [[TMP12:%.*]] = select <32 x i1> [[TMP8]], <32 x i8> [[WIDE_LOAD]], <32 x i8> splat (i8 -1) +; CHECK-NEXT: [[TMP13:%.*]] = select <32 x i1> [[TMP9]], <32 x i8> [[WIDE_LOAD4]], <32 x i8> splat (i8 -1) +; CHECK-NEXT: [[TMP14:%.*]] = select <32 x i1> [[TMP10]], <32 x i8> [[WIDE_LOAD5]], <32 x i8> splat (i8 -1) +; CHECK-NEXT: [[TMP15:%.*]] = select <32 x i1> [[TMP11]], <32 x i8> [[WIDE_LOAD6]], <32 x i8> splat (i8 -1) +; CHECK-NEXT: [[TMP16]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI]], <32 x i8> [[TMP12]]) +; CHECK-NEXT: [[TMP17]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI1]], <32 x i8> [[TMP13]]) +; CHECK-NEXT: [[TMP18]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI2]], <32 x i8> [[TMP14]]) +; CHECK-NEXT: [[TMP19]] = tail call <32 x i8> @llvm.umin.v32i8(<32 x i8> [[VEC_PHI3]], <32 x i8> [[TMP15]]) ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i64 [[INDEX]], 128 ; CHECK-NEXT: [[TMP20:%.*]] = icmp eq i64 [[INDEX_NEXT]], 1024 ; CHECK-NEXT: br i1 [[TMP20]], label [[MIDDLE_BLOCK:%.*]], label [[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]]