From 4ad7b1c155b745f011825026ceb5354ecd236ad2 Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 7 Jul 2025 18:03:44 +0800 Subject: [PATCH 1/2] Precommit test --- .../LoopVectorize/select-neg-cond.ll | 63 +++++++++++++++++++ 1 file changed, 63 insertions(+) create mode 100644 llvm/test/Transforms/LoopVectorize/select-neg-cond.ll diff --git a/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll b/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll new file mode 100644 index 0000000000000..84c0ee877dc31 --- /dev/null +++ b/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll @@ -0,0 +1,63 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --check-globals none --version 5 +; RUN: opt -p loop-vectorize -force-vector-interleave=1 -force-vector-width=4 -S < %s | FileCheck %s + +define void @neg_cond(ptr noalias %p, ptr noalias %q) { +; CHECK-LABEL: define void @neg_cond( +; CHECK-SAME: ptr noalias [[P:%.*]], ptr noalias [[Q:%.*]]) { +; CHECK-NEXT: [[ENTRY:.*]]: +; CHECK-NEXT: br i1 false, label %[[SCALAR_PH:.*]], label %[[VECTOR_PH:.*]] +; CHECK: [[VECTOR_PH]]: +; CHECK-NEXT: br label %[[VECTOR_BODY:.*]] +; CHECK: [[VECTOR_BODY]]: +; CHECK-NEXT: [[INDEX:%.*]] = phi i32 [ 0, %[[VECTOR_PH]] ], [ [[INDEX_NEXT:%.*]], %[[VECTOR_BODY]] ] +; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P]], i32 [[INDEX]] +; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP0]], i32 0 +; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 42) +; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP2]], splat (i1 true) +; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> splat (i32 42), <4 x i32> splat (i32 43) +; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP1]], align 4 +; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4 +; CHECK-NEXT: [[TMP5:%.*]] = icmp eq i32 [[INDEX_NEXT]], 1024 +; CHECK-NEXT: br i1 [[TMP5]], label %[[MIDDLE_BLOCK:.*]], label %[[VECTOR_BODY]], !llvm.loop [[LOOP0:![0-9]+]] +; CHECK: [[MIDDLE_BLOCK]]: +; CHECK-NEXT: br i1 true, label %[[EXIT:.*]], label %[[SCALAR_PH]] +; CHECK: [[SCALAR_PH]]: +; CHECK-NEXT: [[BC_RESUME_VAL:%.*]] = phi i32 [ 1024, %[[MIDDLE_BLOCK]] ], [ 0, %[[ENTRY]] ] +; CHECK-NEXT: br label %[[LOOP:.*]] +; CHECK: [[LOOP]]: +; CHECK-NEXT: [[IV:%.*]] = phi i32 [ [[BC_RESUME_VAL]], %[[SCALAR_PH]] ], [ [[IV_NEXT:%.*]], %[[LOOP]] ] +; CHECK-NEXT: [[P_GEP:%.*]] = getelementptr i32, ptr [[P]], i32 [[IV]] +; CHECK-NEXT: [[X:%.*]] = load i32, ptr [[P_GEP]], align 4 +; CHECK-NEXT: [[Q_GEP:%.*]] = getelementptr i32, ptr [[Q]], i32 [[IV]] +; CHECK-NEXT: [[Y:%.*]] = load i32, ptr [[Q_GEP]], align 4 +; CHECK-NEXT: [[CMP:%.*]] = icmp eq i32 [[X]], 42 +; CHECK-NEXT: [[NOT:%.*]] = xor i1 [[CMP]], true +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[NOT]], i32 42, i32 43 +; CHECK-NEXT: store i32 [[SEL]], ptr [[P_GEP]], align 4 +; CHECK-NEXT: [[IV_NEXT]] = add i32 [[IV]], 1 +; CHECK-NEXT: [[DONE:%.*]] = icmp eq i32 [[IV_NEXT]], 1024 +; CHECK-NEXT: br i1 [[DONE]], label %[[EXIT]], label %[[LOOP]], !llvm.loop [[LOOP3:![0-9]+]] +; CHECK: [[EXIT]]: +; CHECK-NEXT: ret void +; +entry: + br label %loop + +loop: + %iv = phi i32 [0, %entry], [%iv.next, %loop] + %p.gep = getelementptr i32, ptr %p, i32 %iv + %x = load i32, ptr %p.gep + %q.gep = getelementptr i32, ptr %q, i32 %iv + %y = load i32, ptr %q.gep + %cmp = icmp eq i32 %x, 42 + %not = xor i1 %cmp, 1 + %sel = select i1 %not, i32 42, i32 43 + store i32 %sel, ptr %p.gep + %iv.next = add i32 %iv, 1 + %done = icmp eq i32 %iv.next, 1024 + br i1 %done, label %exit, label %loop + +exit: + ret void +} From 2a3d849c18c812b2cf2856b7a42afeaa9322118c Mon Sep 17 00:00:00 2001 From: Luke Lau Date: Mon, 7 Jul 2025 18:06:39 +0800 Subject: [PATCH 2/2] [VPlan] Simplify select !c, x, y -> select c, y, x This is split off from #133993 On its own this simplification isn't that useful, but it allows us to make the equivalent VPBlendRecipe optimisation more generic by operating on VPInstructions. In order to actually test this without #133993, I've had to also extend the m_Not pattern matcher to also catch VPWidenRecipes, since I couldn't really think of a straightforward way to create a VPInstruction::Select with a negated condition. --- llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h | 15 +++++++++------ llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp | 9 +++++++++ .../Transforms/LoopVectorize/select-neg-cond.ll | 3 +-- 3 files changed, 19 insertions(+), 8 deletions(-) diff --git a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h index efea99f22d086..d133610ef4f75 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h +++ b/llvm/lib/Transforms/Vectorize/VPlanPatternMatch.h @@ -366,12 +366,6 @@ m_Freeze(const Op0_t &Op0) { return m_VPInstruction(Op0); } -template -inline UnaryVPInstruction_match -m_Not(const Op0_t &Op0) { - return m_VPInstruction(Op0); -} - template inline UnaryVPInstruction_match m_BranchOnCond(const Op0_t &Op0) { @@ -491,6 +485,15 @@ m_Select(const Op0_t &Op0, const Op1_t &Op1, const Op2_t &Op2) { {Op0, Op1, Op2}); } +template +inline match_combine_or, + AllBinaryRecipe_match, Op0_t, + Instruction::Xor, true>> +m_Not(const Op0_t &Op0) { + return m_CombineOr(m_VPInstruction(Op0), + m_c_Binary(m_AllOnes(), Op0)); +} + template inline match_combine_or< BinaryVPInstruction_match, diff --git a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp index 90137b72c83fb..701a6e455d09c 100644 --- a/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp +++ b/llvm/lib/Transforms/Vectorize/VPlanTransforms.cpp @@ -1082,6 +1082,15 @@ static void simplifyRecipe(VPRecipeBase &R, VPTypeAnalysis &TypeInfo) { if (match(Def, m_Select(m_VPValue(), m_VPValue(X), m_Deferred(X)))) return Def->replaceAllUsesWith(X); + // select !c, x, y -> select c, y, x + VPValue *C; + if (match(Def, m_Select(m_Not(m_VPValue(C)), m_VPValue(X), m_VPValue(Y)))) { + Def->setOperand(0, C); + Def->setOperand(1, Y); + Def->setOperand(2, X); + return; + } + if (match(Def, m_c_Mul(m_VPValue(A), m_SpecificInt(1)))) return Def->replaceAllUsesWith(A); diff --git a/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll b/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll index 84c0ee877dc31..def239eed33bc 100644 --- a/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll +++ b/llvm/test/Transforms/LoopVectorize/select-neg-cond.ll @@ -13,8 +13,7 @@ define void @neg_cond(ptr noalias %p, ptr noalias %q) { ; CHECK-NEXT: [[TMP0:%.*]] = getelementptr i32, ptr [[P]], i32 [[INDEX]] ; CHECK-NEXT: [[TMP1:%.*]] = getelementptr i32, ptr [[TMP0]], i32 0 ; CHECK-NEXT: [[WIDE_LOAD:%.*]] = load <4 x i32>, ptr [[TMP1]], align 4 -; CHECK-NEXT: [[TMP2:%.*]] = icmp eq <4 x i32> [[WIDE_LOAD]], splat (i32 42) -; CHECK-NEXT: [[TMP3:%.*]] = xor <4 x i1> [[TMP2]], splat (i1 true) +; CHECK-NEXT: [[TMP3:%.*]] = icmp ne <4 x i32> [[WIDE_LOAD]], splat (i32 42) ; CHECK-NEXT: [[TMP4:%.*]] = select <4 x i1> [[TMP3]], <4 x i32> splat (i32 42), <4 x i32> splat (i32 43) ; CHECK-NEXT: store <4 x i32> [[TMP4]], ptr [[TMP1]], align 4 ; CHECK-NEXT: [[INDEX_NEXT]] = add nuw i32 [[INDEX]], 4