diff --git a/llvm/include/llvm/Transforms/IPO/Attributor.h b/llvm/include/llvm/Transforms/IPO/Attributor.h index a013f27766051..8c0342ae5cf12 100644 --- a/llvm/include/llvm/Transforms/IPO/Attributor.h +++ b/llvm/include/llvm/Transforms/IPO/Attributor.h @@ -5339,6 +5339,17 @@ struct AAPotentialConstantValues return nullptr; } + /// Return the minimum trailing zeros of potential constants + unsigned getAssumedMinTrailingZeros() const { + unsigned TrailingZeros = getAssumedSet().begin()->getBitWidth() + 1; + for (const APInt &It : getAssumedSet()) { + if (It.countTrailingZeros() < TrailingZeros) + TrailingZeros = It.countTrailingZeros(); + } + if (TrailingZeros > getAssumedSet().begin()->getBitWidth()) + return 0; + return TrailingZeros; + } /// See AbstractAttribute::getName() StringRef getName() const override { return "AAPotentialConstantValues"; } diff --git a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp index 5ed47aec08b25..a6ac7610a2c7a 100644 --- a/llvm/lib/Transforms/IPO/AttributorAttributes.cpp +++ b/llvm/lib/Transforms/IPO/AttributorAttributes.cpp @@ -5185,6 +5185,7 @@ struct AADereferenceableCallSiteReturned final // ------------------------ Align Argument Attribute ------------------------ namespace { + static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA, Value &AssociatedValue, const Use *U, const Instruction *I, bool &TrackUse) { @@ -5200,6 +5201,28 @@ static unsigned getKnownAlignForUse(Attributor &A, AAAlign &QueryingAA, TrackUse = true; return 0; } + if (const IntrinsicInst *II = dyn_cast(I)) + switch (II->getIntrinsicID()) { + case Intrinsic::ptrmask: { + // Is it appropriate to pull attribute in initialization? + const auto *ConstVals = A.getAAFor( + QueryingAA, IRPosition::value(*II->getOperand(1)), DepClassTy::NONE); + const auto *AlignAA = A.getAAFor( + QueryingAA, IRPosition::value(*II), DepClassTy::NONE); + if (ConstVals && ConstVals->isValidState() && ConstVals->isAtFixpoint()) { + unsigned ShiftValue = std::min(ConstVals->getAssumedMinTrailingZeros(), + Value::MaxAlignmentExponent); + Align ConstAlign(UINT64_C(1) << ShiftValue); + if (ConstAlign >= AlignAA->getKnownAlign()) + return Align(1).value(); + } + if (AlignAA) + return AlignAA->getKnownAlign().value(); + break; + } + default: + break; + } MaybeAlign MA; if (const auto *CB = dyn_cast(I)) { @@ -5499,6 +5522,44 @@ struct AAAlignCallSiteReturned final AAAlignCallSiteReturned(const IRPosition &IRP, Attributor &A) : Base(IRP, A) {} + ChangeStatus updateImpl(Attributor &A) override { + Instruction *I = getIRPosition().getCtxI(); + if (const IntrinsicInst *II = dyn_cast(I)) { + switch (II->getIntrinsicID()) { + case Intrinsic::ptrmask: { + Align Alignment; + bool Valid = false; + + const auto *ConstVals = A.getAAFor( + *this, IRPosition::value(*II->getOperand(1)), DepClassTy::REQUIRED); + if (ConstVals && ConstVals->isValidState()) { + unsigned ShiftValue = + std::min(ConstVals->getAssumedMinTrailingZeros(), + Value::MaxAlignmentExponent); + Alignment = Align(UINT64_C(1) << ShiftValue); + Valid = true; + } + + const auto *AlignAA = + A.getAAFor(*this, IRPosition::value(*(II->getOperand(0))), + DepClassTy::REQUIRED); + if (AlignAA && AlignAA->isValidState()) { + Alignment = std::max(AlignAA->getAssumedAlign(), Alignment); + Valid = true; + } + + if (Valid) + return clampStateAndIndicateChange( + this->getState(), + std::min(this->getAssumedAlign(), Alignment).value()); + break; + } + default: + break; + } + } + return Base::updateImpl(A); + }; /// See AbstractAttribute::trackStatistics() void trackStatistics() const override { STATS_DECLTRACK_CS_ATTR(align); } }; diff --git a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll index 649e9467c0318..fffe50fde1e50 100644 --- a/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll +++ b/llvm/test/Transforms/Attributor/ArgumentPromotion/X86/min-legal-vector-width.ll @@ -9,15 +9,25 @@ target triple = "x86_64-unknown-linux-gnu" ; This should promote define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 { ; -; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable -; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 -; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { -; CHECK-NEXT: bb: -; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; TUNIT-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 +; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { +; TUNIT-NEXT: bb: +; TUNIT-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; TUNIT-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; CGSCC-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer512 +; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0:[0-9]+]] { +; CGSCC-NEXT: bb: +; CGSCC-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; CGSCC-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; CGSCC-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64, !invariant.load [[META0:![0-9]+]] +; CGSCC-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; CGSCC-NEXT: ret void ; bb: %tmp = load <8 x i64>, ptr %arg1 @@ -66,15 +76,25 @@ bb: ; This should promote define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 { ; -; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable -; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 -; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { -; CHECK-NEXT: bb: -; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; TUNIT-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 +; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { +; TUNIT-NEXT: bb: +; TUNIT-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; TUNIT-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; CGSCC-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer256 +; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1:[0-9]+]] { +; CGSCC-NEXT: bb: +; CGSCC-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; CGSCC-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; CGSCC-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64, !invariant.load [[META0]] +; CGSCC-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; CGSCC-NEXT: ret void ; bb: %tmp = load <8 x i64>, ptr %arg1 @@ -123,15 +143,25 @@ bb: ; This should promote define internal fastcc void @callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 { ; -; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable -; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 -; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: bb: -; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; TUNIT-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 +; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1]] { +; TUNIT-NEXT: bb: +; TUNIT-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; TUNIT-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; CGSCC-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer512_call_avx512_legal512_prefer256 +; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR1]] { +; CGSCC-NEXT: bb: +; CGSCC-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; CGSCC-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; CGSCC-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64, !invariant.load [[META0]] +; CGSCC-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; CGSCC-NEXT: ret void ; bb: %tmp = load <8 x i64>, ptr %arg1 @@ -180,15 +210,25 @@ bb: ; This should promote define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512(ptr %arg, ptr readonly %arg1) #0 { ; -; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable -; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 -; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: bb: -; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; TUNIT-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 +; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0]] { +; TUNIT-NEXT: bb: +; TUNIT-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; TUNIT-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; CGSCC-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal512_prefer512 +; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR0]] { +; CGSCC-NEXT: bb: +; CGSCC-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; CGSCC-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; CGSCC-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64, !invariant.load [[META0]] +; CGSCC-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; CGSCC-NEXT: ret void ; bb: %tmp = load <8 x i64>, ptr %arg1 @@ -237,13 +277,21 @@ bb: ; This should not promote define internal fastcc void @callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256(ptr %arg, ptr readonly %arg1) #1 { ; -; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable -; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 -; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[ARG1:%.*]]) #[[ATTR1]] { -; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64 -; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; TUNIT-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[ARG1:%.*]]) #[[ATTR1]] { +; TUNIT-NEXT: bb: +; TUNIT-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64 +; TUNIT-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; CGSCC-LABEL: define {{[^@]+}}@callee_avx512_legal256_prefer256_call_avx512_legal512_prefer256 +; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[ARG1:%.*]]) #[[ATTR1]] { +; CGSCC-NEXT: bb: +; CGSCC-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64, !invariant.load [[META0]] +; CGSCC-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; CGSCC-NEXT: ret void ; bb: %tmp = load <8 x i64>, ptr %arg1 @@ -290,13 +338,21 @@ bb: ; This should not promote define internal fastcc void @callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256(ptr %arg, ptr readonly %arg1) #2 { ; -; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable -; CHECK-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 -; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[ARG1:%.*]]) #[[ATTR2:[0-9]+]] { -; CHECK-NEXT: bb: -; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64 -; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; TUNIT-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[ARG1:%.*]]) #[[ATTR2]] { +; TUNIT-NEXT: bb: +; TUNIT-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64 +; TUNIT-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; CGSCC-LABEL: define {{[^@]+}}@callee_avx512_legal512_prefer256_call_avx512_legal256_prefer256 +; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], ptr noalias nofree noundef nonnull readonly align 64 captures(none) dereferenceable(64) [[ARG1:%.*]]) #[[ATTR2]] { +; CGSCC-NEXT: bb: +; CGSCC-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1]], align 64, !invariant.load [[META0]] +; CGSCC-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; CGSCC-NEXT: ret void ; bb: %tmp = load <8 x i64>, ptr %arg1 @@ -343,15 +399,25 @@ bb: ; This should promote define internal fastcc void @callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256(ptr %arg, ptr readonly %arg1) #3 { ; -; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable -; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 -; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] { -; CHECK-NEXT: bb: -; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; TUNIT-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 +; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] { +; TUNIT-NEXT: bb: +; TUNIT-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; TUNIT-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; CGSCC-LABEL: define {{[^@]+}}@callee_avx2_legal256_prefer256_call_avx2_legal512_prefer256 +; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3:[0-9]+]] { +; CGSCC-NEXT: bb: +; CGSCC-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; CGSCC-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; CGSCC-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64, !invariant.load [[META0]] +; CGSCC-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; CGSCC-NEXT: ret void ; bb: %tmp = load <8 x i64>, ptr %arg1 @@ -400,15 +466,25 @@ bb: ; This should promote define internal fastcc void @callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256(ptr %arg, ptr readonly %arg1) #4 { ; -; CHECK: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable -; CHECK-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 -; CHECK-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3]] { -; CHECK-NEXT: bb: -; CHECK-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 -; CHECK-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 -; CHECK-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 -; CHECK-NEXT: ret void +; TUNIT: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; TUNIT-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 +; TUNIT-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3]] { +; TUNIT-NEXT: bb: +; TUNIT-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; TUNIT-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64 +; TUNIT-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; TUNIT-NEXT: ret void +; +; CGSCC: Function Attrs: inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable +; CGSCC-LABEL: define {{[^@]+}}@callee_avx2_legal512_prefer256_call_avx2_legal256_prefer256 +; CGSCC-SAME: (ptr noalias nofree noundef nonnull writeonly align 64 captures(none) dereferenceable(64) [[ARG:%.*]], <8 x i64> [[TMP0:%.*]]) #[[ATTR3]] { +; CGSCC-NEXT: bb: +; CGSCC-NEXT: [[ARG1_PRIV:%.*]] = alloca <8 x i64>, align 64 +; CGSCC-NEXT: store <8 x i64> [[TMP0]], ptr [[ARG1_PRIV]], align 64 +; CGSCC-NEXT: [[TMP:%.*]] = load <8 x i64>, ptr [[ARG1_PRIV]], align 64, !invariant.load [[META0]] +; CGSCC-NEXT: store <8 x i64> [[TMP]], ptr [[ARG]], align 64 +; CGSCC-NEXT: ret void ; bb: %tmp = load <8 x i64>, ptr %arg1 @@ -464,6 +540,14 @@ attributes #3 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2 attributes #4 = { inlinehint norecurse nounwind uwtable "target-features"="+avx2" "min-legal-vector-width"="256" "prefer-vector-width"="256" } attributes #5 = { argmemonly nounwind } ;. +; CGSCC: attributes #[[ATTR0]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" } +; CGSCC: attributes #[[ATTR1]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; CGSCC: attributes #[[ATTR2]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } +; CGSCC: attributes #[[ATTR3]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" } +; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } +; CGSCC: attributes #[[ATTR5]] = { nofree willreturn memory(write) } +; CGSCC: attributes #[[ATTR6]] = { nofree nounwind willreturn } +;. ; TUNIT: attributes #[[ATTR0]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" } ; TUNIT: attributes #[[ATTR1]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" } ; TUNIT: attributes #[[ATTR2]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } @@ -472,11 +556,7 @@ attributes #5 = { argmemonly nounwind } ; TUNIT: attributes #[[ATTR5]] = { nofree willreturn memory(write) } ; TUNIT: attributes #[[ATTR6]] = { nofree nosync nounwind willreturn } ;. -; CGSCC: attributes #[[ATTR0]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="512" "target-features"="+avx512vl" } -; CGSCC: attributes #[[ATTR1]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx512vl" } -; CGSCC: attributes #[[ATTR2]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="256" "prefer-vector-width"="256" "target-features"="+avx512vl" } -; CGSCC: attributes #[[ATTR3]] = { inlinehint mustprogress nofree norecurse nosync nounwind willreturn memory(argmem: readwrite) uwtable "min-legal-vector-width"="512" "prefer-vector-width"="256" "target-features"="+avx2" } -; CGSCC: attributes #[[ATTR4:[0-9]+]] = { nocallback nofree nounwind willreturn memory(argmem: write) } -; CGSCC: attributes #[[ATTR5]] = { nofree willreturn memory(write) } -; CGSCC: attributes #[[ATTR6]] = { nofree nounwind willreturn } +; CGSCC: [[META0]] = !{} ;. +;; NOTE: These prefixes are unused and the list is autogenerated. Do not add tests below this line: +; CHECK: {{.*}} diff --git a/llvm/test/Transforms/Attributor/align-ptrmask.ll b/llvm/test/Transforms/Attributor/align-ptrmask.ll new file mode 100644 index 0000000000000..008f5e1b8a46e --- /dev/null +++ b/llvm/test/Transforms/Attributor/align-ptrmask.ll @@ -0,0 +1,206 @@ +; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 +; RUN: opt -passes=attributor -S < %s | FileCheck %s + +define ptr @align_ptrmask_back_no_prop(ptr align 2 %x, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define noundef nonnull align 8 dereferenceable(4) ptr @align_ptrmask_back_no_prop( +; CHECK-SAME: ptr nofree writeonly align 2 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16 +; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR4:[0-9]+]] +; CHECK-NEXT: store float 1.000000e+00, ptr [[P]], align 8 +; CHECK-NEXT: ret ptr [[P]] +; + %sel = select i1 %cmp1, i64 -32, i64 -8 + %sel1 = select i1 %cmp2, i64 %sel, i64 -16 + %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 %sel1) + store float 1.0, ptr %p, align 8 + ret ptr %p +} + +define ptr @align_ptrmask_back_prop(ptr align 2 %x, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define noundef nonnull align 16 dereferenceable(4) ptr @align_ptrmask_back_prop( +; CHECK-SAME: ptr nofree writeonly align 16 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16 +; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 16 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR4]] +; CHECK-NEXT: store float 1.000000e+00, ptr [[P]], align 16 +; CHECK-NEXT: ret ptr [[P]] +; + %sel = select i1 %cmp1, i64 -32, i64 -8 + %sel1 = select i1 %cmp2, i64 %sel, i64 -16 + %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 %sel1) + store float 1.0, ptr %p, align 16 + ret ptr %p +} + +define ptr @align_ptrmask_forward_mask(ptr align 2 %x, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 8 ptr @align_ptrmask_forward_mask( +; CHECK-SAME: ptr nofree readnone align 2 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1:[0-9]+]] { +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16 +; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR4]] +; CHECK-NEXT: ret ptr [[P]] +; + %sel = select i1 %cmp1, i64 -32, i64 -8 + %sel1 = select i1 %cmp2, i64 %sel, i64 -16 + %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 %sel1) + ret ptr %p +} + +define ptr @align_ptrmask_forward_ptr(ptr align 16 %x, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 16 ptr @align_ptrmask_forward_ptr( +; CHECK-SAME: ptr nofree readnone align 16 [[X:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 -8 +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16 +; CHECK-NEXT: [[P:%.*]] = tail call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef [[SEL1]]) #[[ATTR4]] +; CHECK-NEXT: ret ptr [[P]] +; + %sel = select i1 %cmp1, i64 -32, i64 -8 + %sel1 = select i1 %cmp2, i64 %sel, i64 -16 + %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 %sel1) + ret ptr %p +} + +define ptr @align_ptrmask_forward_nonconst_mask(ptr align 8 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 8 ptr @align_ptrmask_forward_nonconst_mask( +; CHECK-SAME: ptr nofree readnone align 8 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 [[Y]] +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16 +; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 [[SEL1]]) #[[ATTR4]] +; CHECK-NEXT: ret ptr [[P]] +; + %sel = select i1 %cmp1, i64 -32, i64 %y + %sel1 = select i1 %cmp2, i64 %sel, i64 -16 + %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 %sel1) + ret ptr %p +} + +define ptr @align_ptrmask_back_nonconst_mask(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define noundef nonnull align 8 dereferenceable(4) ptr @align_ptrmask_back_nonconst_mask( +; CHECK-SAME: ptr nofree writeonly align 8 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[SEL:%.*]] = select i1 [[CMP1]], i64 -32, i64 [[Y]] +; CHECK-NEXT: [[SEL1:%.*]] = select i1 [[CMP2]], i64 [[SEL]], i64 -16 +; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 [[SEL1]]) #[[ATTR4]] +; CHECK-NEXT: store float 1.000000e+00, ptr [[P]], align 8 +; CHECK-NEXT: ret ptr [[P]] +; + %sel = select i1 %cmp1, i64 -32, i64 %y + %sel1 = select i1 %cmp2, i64 %sel, i64 -16 + %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 %sel1) + store float 1.0, ptr %p, align 8 + ret ptr %p +} + +define ptr @align_ptrmask_back_const_back_noprop(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define noundef nonnull align 8 dereferenceable(4) ptr @align_ptrmask_back_const_back_noprop( +; CHECK-SAME: ptr nofree writeonly align 4 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -8) #[[ATTR4]] +; CHECK-NEXT: store float 1.000000e+00, ptr [[P]], align 8 +; CHECK-NEXT: ret ptr [[P]] +; + %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -8) + store float 1.0, ptr %p, align 8 + ret ptr %p +} + +define ptr @align_ptrmask_back_const_back_prop(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define noundef nonnull align 8 dereferenceable(4) ptr @align_ptrmask_back_const_back_prop( +; CHECK-SAME: ptr nofree writeonly align 8 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[P:%.*]] = tail call noundef nonnull align 8 dereferenceable(4) ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -2) #[[ATTR4]] +; CHECK-NEXT: store float 1.000000e+00, ptr [[P]], align 8 +; CHECK-NEXT: ret ptr [[P]] +; + %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -2) + store float 1.0, ptr %p, align 8 + ret ptr %p +} + +define ptr @align_ptrmask_back_const_forward_mask(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 8 ptr @align_ptrmask_back_const_forward_mask( +; CHECK-SAME: ptr nofree readnone align 4 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[P:%.*]] = tail call align 8 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -8) #[[ATTR4]] +; CHECK-NEXT: ret ptr [[P]] +; + %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -8) + ret ptr %p +} + +define ptr @align_ptrmask_back_const_forward_ptr(ptr align 16 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 16 ptr @align_ptrmask_back_const_forward_ptr( +; CHECK-SAME: ptr nofree readnone align 16 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[P:%.*]] = tail call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -8) #[[ATTR4]] +; CHECK-NEXT: ret ptr [[P]] +; + %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -8) + ret ptr %p +} + +; FIXME: The store will create AAAlign for %ptr1, +; but the attribute didn't propagate through extractelement, need propagate +define <2 x ptr> @ptrmask_v2p0_v2i64(<2 x ptr> align 2 %ptr, i64 %a) { +; CHECK-LABEL: define <2 x ptr> @ptrmask_v2p0_v2i64( +; CHECK-SAME: <2 x ptr> align 2 [[PTR:%.*]], i64 [[A:%.*]]) #[[ATTR2:[0-9]+]] { +; CHECK-NEXT: [[RESULT:%.*]] = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> [[PTR]], <2 x i64> noundef splat (i64 -8)) #[[ATTR4]] +; CHECK-NEXT: [[PTR1:%.*]] = extractelement <2 x ptr> [[RESULT]], i32 0 +; CHECK-NEXT: [[PTR2:%.*]] = extractelement <2 x ptr> [[RESULT]], i32 1 +; CHECK-NEXT: store i64 [[A]], ptr [[PTR1]], align 16 +; CHECK-NEXT: store i64 [[A]], ptr [[PTR2]], align 16 +; CHECK-NEXT: ret <2 x ptr> [[RESULT]] +; + %result = call <2 x ptr> @llvm.ptrmask.v2p0.v2i64(<2 x ptr> %ptr, <2 x i64> splat(i64 -8)) + %ptr1 = extractelement <2 x ptr> %result, i32 0 + %ptr2 = extractelement <2 x ptr> %result, i32 1 + store i64 %a, ptr %ptr1, align 16 + store i64 %a, ptr %ptr2, align 16 + ret <2 x ptr> %result +} + +define ptr @align_ptrmask_forward_mask_positive(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 4 ptr @align_ptrmask_forward_mask_positive( +; CHECK-SAME: ptr nofree readnone align 4 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[P:%.*]] = tail call align 4 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef 2) #[[ATTR4]] +; CHECK-NEXT: ret ptr [[P]] +; + %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 2) + ret ptr %p +} + +define ptr @align_ptrmask_forward_mask_poison(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 4 ptr @align_ptrmask_forward_mask_poison( +; CHECK-SAME: ptr nofree readnone align 4 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[P:%.*]] = tail call align 4 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 poison) #[[ATTR4]] +; CHECK-NEXT: ret ptr [[P]] +; + %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 poison) + ret ptr %p +} + +define ptr @align_ptrmask_forward_mask_max(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 4294967296 ptr @align_ptrmask_forward_mask_max( +; CHECK-SAME: ptr nofree readnone align 4 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[P:%.*]] = tail call align 4294967296 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -4294967296) #[[ATTR4]] +; CHECK-NEXT: ret ptr [[P]] +; + %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -4294967296) + ret ptr %p +} + +define ptr @align_ptrmask_forward_mask_max_plus_one(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 4294967296 ptr @align_ptrmask_forward_mask_max_plus_one( +; CHECK-SAME: ptr nofree readnone align 4 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[P:%.*]] = tail call align 4294967296 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -8589934592) #[[ATTR4]] +; CHECK-NEXT: ret ptr [[P]] +; + %p = tail call ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -8589934592) + ret ptr %p +} + +define ptr @align_ptrmask_back_callsite(ptr align 4 %x, i64 %y, i1 %cmp1, i1 %cmp2) { +; CHECK-LABEL: define align 16 ptr @align_ptrmask_back_callsite( +; CHECK-SAME: ptr nofree readnone align 16 [[X:%.*]], i64 [[Y:%.*]], i1 [[CMP1:%.*]], i1 [[CMP2:%.*]]) #[[ATTR1]] { +; CHECK-NEXT: [[P:%.*]] = tail call align 16 ptr @llvm.ptrmask.p0.i64(ptr [[X]], i64 noundef -4) #[[ATTR4]] +; CHECK-NEXT: ret ptr [[P]] +; + %p = tail call align 16 ptr @llvm.ptrmask.p0.i64(ptr %x, i64 -4) + ret ptr %p +} diff --git a/llvm/test/Transforms/OpenMP/parallel_deletion.ll b/llvm/test/Transforms/OpenMP/parallel_deletion.ll index 67970c41f765e..0b6c4f32772f5 100644 --- a/llvm/test/Transforms/OpenMP/parallel_deletion.ll +++ b/llvm/test/Transforms/OpenMP/parallel_deletion.ll @@ -385,7 +385,7 @@ define internal void @.omp_outlined..4(ptr noalias %.global_tid., ptr noalias %. ; CHECK-LABEL: define {{[^@]+}}@.omp_outlined..4 ; CHECK-SAME: (ptr noalias nofree noundef nonnull readonly align 4 captures(none) dereferenceable(4) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !invariant.load [[META1:![0-9]+]] ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_master(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]] @@ -458,7 +458,7 @@ define internal void @.omp_outlined..5(ptr noalias %.global_tid., ptr noalias %. ; CHECK-SAME: (ptr noalias nofree readonly captures(none) [[DOTGLOBAL_TID_:%.*]], ptr noalias nofree readnone captures(none) [[DOTBOUND_TID_:%.*]], ptr nofree noundef nonnull align 4 captures(none) dereferenceable(4) [[A:%.*]]) { ; CHECK-NEXT: entry: ; CHECK-NEXT: [[OMP_GLOBAL_THREAD_NUM:%.*]] = call i32 @__kmpc_global_thread_num(ptr noundef nonnull @[[GLOB0]]) #[[ATTR19]] -; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !invariant.load [[META1]] ; CHECK-NEXT: [[TMP1:%.*]] = call i32 @__kmpc_single(ptr noundef nonnull @[[GLOB0]], i32 [[TMP]]) ; CHECK-NEXT: [[TMP2:%.*]] = icmp eq i32 [[TMP1]], 0 ; CHECK-NEXT: br i1 [[TMP2]], label [[OMP_IF_END:%.*]], label [[OMP_IF_THEN:%.*]] @@ -534,7 +534,7 @@ define internal void @.omp_outlined..6(ptr noalias %.global_tid., ptr noalias %. ; CHECK-NEXT: call void @llvm.lifetime.start.p0(ptr noundef nonnull align 4 [[A1]]) #[[ATTR20:[0-9]+]] ; CHECK-NEXT: store i32 1, ptr [[A1]], align 4 ; CHECK-NEXT: store ptr [[A1]], ptr [[DOTOMP_REDUCTION_RED_LIST]], align 8 -; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load i32, ptr [[DOTGLOBAL_TID_]], align 4, !invariant.load [[META1]] ; CHECK-NEXT: [[TMP4:%.*]] = call i32 @__kmpc_reduce_nowait(ptr noundef nonnull @[[GLOB2:[0-9]+]], i32 [[TMP2]], i32 noundef 1, i64 noundef 8, ptr noundef nonnull align 8 [[DOTOMP_REDUCTION_RED_LIST]], ptr noundef nonnull @.omp.reduction.reduction_func, ptr noundef nonnull @.gomp_critical_user_.reduction.var) ; CHECK-NEXT: switch i32 [[TMP4]], label [[DOTOMP_REDUCTION_DEFAULT:%.*]] [ ; CHECK-NEXT: i32 1, label [[DOTOMP_REDUCTION_CASE1:%.*]] @@ -646,10 +646,10 @@ define internal void @.omp.reduction.reduction_func(ptr %arg, ptr %arg1) { ; CHECK-LABEL: define {{[^@]+}}@.omp.reduction.reduction_func ; CHECK-SAME: (ptr nofree noundef nonnull readonly align 8 captures(none) dereferenceable(8) [[ARG:%.*]], ptr nofree noundef nonnull readonly align 8 captures(none) dereferenceable(8) [[ARG1:%.*]]) #[[ATTR10:[0-9]+]] { ; CHECK-NEXT: entry: -; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARG1]], align 8 -; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARG]], align 8 -; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4 -; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4 +; CHECK-NEXT: [[TMP2:%.*]] = load ptr, ptr [[ARG1]], align 8, !invariant.load [[META1]] +; CHECK-NEXT: [[TMP4:%.*]] = load ptr, ptr [[ARG]], align 8, !invariant.load [[META1]] +; CHECK-NEXT: [[TMP5:%.*]] = load i32, ptr [[TMP4]], align 4, !invariant.load [[META1]] +; CHECK-NEXT: [[TMP6:%.*]] = load i32, ptr [[TMP2]], align 4, !invariant.load [[META1]] ; CHECK-NEXT: [[ADD:%.*]] = add nsw i32 [[TMP5]], [[TMP6]] ; CHECK-NEXT: store i32 [[ADD]], ptr [[TMP4]], align 4 ; CHECK-NEXT: ret void