diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp index 5dd1fe14e5626..e6250ddf2c26b 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp @@ -224,7 +224,7 @@ #include "SIDefines.h" #include "llvm/ADT/SetOperations.h" #include "llvm/ADT/SmallVector.h" -#include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/InstSimplifyFolder.h" #include "llvm/Analysis/Utils/Local.h" #include "llvm/CodeGen/TargetPassConfig.h" #include "llvm/IR/AttributeMask.h" @@ -445,7 +445,7 @@ class StoreFatPtrsAsIntsAndExpandMemcpyVisitor ValueToValueMapTy ConvertedForStore; - IRBuilder<> IRB; + IRBuilder IRB; const TargetMachine *TM; @@ -459,9 +459,10 @@ class StoreFatPtrsAsIntsAndExpandMemcpyVisitor public: StoreFatPtrsAsIntsAndExpandMemcpyVisitor(BufferFatPtrToIntTypeMap *TypeMap, + const DataLayout &DL, LLVMContext &Ctx, const TargetMachine *TM) - : TypeMap(TypeMap), IRB(Ctx), TM(TM) {} + : TypeMap(TypeMap), IRB(Ctx, InstSimplifyFolder(DL)), TM(TM) {} bool processFunction(Function &F); bool visitInstruction(Instruction &I) { return false; } @@ -683,7 +684,7 @@ class LegalizeBufferContentTypesVisitor : public InstVisitor { friend class InstVisitor; - IRBuilder<> IRB; + IRBuilder IRB; const DataLayout &DL; @@ -743,7 +744,7 @@ class LegalizeBufferContentTypesVisitor public: LegalizeBufferContentTypesVisitor(const DataLayout &DL, LLVMContext &Ctx) - : IRB(Ctx), DL(DL) {} + : IRB(Ctx, InstSimplifyFolder(DL)), DL(DL) {} bool processFunction(Function &F); }; } // namespace @@ -1326,7 +1327,7 @@ class SplitPtrStructs : public InstVisitor { const TargetMachine *TM; const GCNSubtarget *ST = nullptr; - IRBuilder<> IRB; + IRBuilder IRB; // Copy metadata between instructions if applicable. void copyMetadata(Value *Dest, Value *Src); @@ -1363,8 +1364,9 @@ class SplitPtrStructs : public InstVisitor { bool IsVolatile, SyncScope::ID SSID); public: - SplitPtrStructs(LLVMContext &Ctx, const TargetMachine *TM) - : TM(TM), IRB(Ctx) {} + SplitPtrStructs(const DataLayout &DL, LLVMContext &Ctx, + const TargetMachine *TM) + : TM(TM), IRB(Ctx, InstSimplifyFolder(DL)) {} void processFunction(Function &F); @@ -1415,7 +1417,7 @@ PtrParts SplitPtrStructs::getPtrParts(Value *V) { return {*RsrcEntry = Rsrc, *OffEntry = Off}; } - IRBuilder<>::InsertPointGuard Guard(IRB); + IRBuilder::InsertPointGuard Guard(IRB); if (auto *I = dyn_cast(V)) { LLVM_DEBUG(dbgs() << "Recursing to split parts of " << *I << "\n"); auto [Rsrc, Off] = visit(*I); @@ -1479,7 +1481,7 @@ void SplitPtrStructs::getPossibleRsrcRoots(Instruction *I, } void SplitPtrStructs::processConditionals() { - SmallDenseMap FoundRsrcs; + SmallDenseMap FoundRsrcs; SmallPtrSet Roots; SmallPtrSet Seen; for (Instruction *I : Conditionals) { @@ -1493,7 +1495,7 @@ void SplitPtrStructs::processConditionals() { if (MaybeFoundRsrc != FoundRsrcs.end()) { MaybeRsrc = MaybeFoundRsrc->second; } else { - IRBuilder<>::InsertPointGuard Guard(IRB); + IRBuilder::InsertPointGuard Guard(IRB); Roots.clear(); Seen.clear(); getPossibleRsrcRoots(I, Roots, Seen); @@ -1558,21 +1560,29 @@ void SplitPtrStructs::processConditionals() { // to put the corrections maps in an inconstent state. That'll be handed // during the rest of the killing. Also, `ValueToValueMapTy` guarantees // that references in that map will be updated as well. - ConditionalTemps.push_back(cast(Rsrc)); - ConditionalTemps.push_back(cast(Off)); - Rsrc->replaceAllUsesWith(NewRsrc); - Off->replaceAllUsesWith(NewOff); + // Note that if the temporary instruction got `InstSimplify`'d away, it + // might be something like a block argument. + if (auto *RsrcInst = dyn_cast(Rsrc)) { + ConditionalTemps.push_back(RsrcInst); + RsrcInst->replaceAllUsesWith(NewRsrc); + } + if (auto *OffInst = dyn_cast(Off)) { + ConditionalTemps.push_back(OffInst); + OffInst->replaceAllUsesWith(NewOff); + } // Save on recomputing the cycle traversals in known-root cases. if (MaybeRsrc) for (Value *V : Seen) - FoundRsrcs[cast(V)] = NewRsrc; + FoundRsrcs[V] = NewRsrc; } else if (isa(I)) { if (MaybeRsrc) { - ConditionalTemps.push_back(cast(Rsrc)); - Rsrc->replaceAllUsesWith(*MaybeRsrc); + if (auto *RsrcInst = dyn_cast(Rsrc)) { + ConditionalTemps.push_back(RsrcInst); + RsrcInst->replaceAllUsesWith(*MaybeRsrc); + } for (Value *V : Seen) - FoundRsrcs[cast(V)] = *MaybeRsrc; + FoundRsrcs[V] = *MaybeRsrc; } } else { llvm_unreachable("Only PHIs and selects go in the conditionals list"); @@ -2426,8 +2436,8 @@ bool AMDGPULowerBufferFatPointers::run(Module &M, const TargetMachine &TM) { /*RemoveDeadConstants=*/false, /*IncludeSelf=*/true); } - StoreFatPtrsAsIntsAndExpandMemcpyVisitor MemOpsRewrite(&IntTM, M.getContext(), - &TM); + StoreFatPtrsAsIntsAndExpandMemcpyVisitor MemOpsRewrite(&IntTM, DL, + M.getContext(), &TM); LegalizeBufferContentTypesVisitor BufferContentsTypeRewrite(DL, M.getContext()); for (Function &F : M.functions()) { @@ -2472,7 +2482,7 @@ bool AMDGPULowerBufferFatPointers::run(Module &M, const TargetMachine &TM) { IntTM.clear(); CloneMap.clear(); - SplitPtrStructs Splitter(M.getContext(), &TM); + SplitPtrStructs Splitter(DL, M.getContext(), &TM); for (Function *F : NeedsPostProcess) Splitter.processFunction(*F); for (Function *F : Intrinsics) { diff --git a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces-vectors.ll b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces-vectors.ll index c509cf4b1bf37..2450ca5063de0 100644 --- a/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces-vectors.ll +++ b/llvm/test/CodeGen/AMDGPU/GlobalISel/irtranslator-non-integral-address-spaces-vectors.ll @@ -49,8 +49,6 @@ define <2 x ptr addrspace(7)> @gep_vector_splat(<2 x ptr addrspace(7)> %ptrs, i6 ; CHECK-NEXT: [[MV2:%[0-9]+]]:_(s64) = G_MERGE_VALUES [[COPY10]](s32), [[COPY11]](s32) ; CHECK-NEXT: [[DEF:%[0-9]+]]:_(<2 x s64>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 - ; CHECK-NEXT: [[DEF1:%[0-9]+]]:_(<2 x p8>) = G_IMPLICIT_DEF - ; CHECK-NEXT: [[DEF2:%[0-9]+]]:_(<2 x s32>) = G_IMPLICIT_DEF ; CHECK-NEXT: [[IVEC:%[0-9]+]]:_(<2 x s64>) = G_INSERT_VECTOR_ELT [[DEF]], [[MV2]](s64), [[C]](s32) ; CHECK-NEXT: [[SHUF:%[0-9]+]]:_(<2 x s64>) = G_SHUFFLE_VECTOR [[IVEC]](<2 x s64>), [[DEF]], shufflemask(0, 0) ; CHECK-NEXT: [[TRUNC:%[0-9]+]]:_(<2 x s32>) = G_TRUNC [[SHUF]](<2 x s64>) diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-calls.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-calls.ll index 022094bc633c8..3765bb0af79ba 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-calls.ll @@ -48,9 +48,8 @@ define ptr addrspace(7) @recur.inner.2(i32 %v, ptr addrspace(7) %x) { ; CHECK-NEXT: [[X_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[X]], 0 ; CHECK-NEXT: [[X_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[X]], 1 ; CHECK-NEXT: [[INC:%.*]] = add i32 [[X_OFF]], 4 -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[X_RSRC]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[INC]], 1 -; CHECK-NEXT: [[RET:%.*]] = call { ptr addrspace(8), i32 } @recur.inner.1({ ptr addrspace(8), i32 } [[TMP2]], i32 [[V]]) +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } [[X]], i32 [[INC]], 1 +; CHECK-NEXT: [[RET:%.*]] = call { ptr addrspace(8), i32 } @recur.inner.1({ ptr addrspace(8), i32 } [[TMP1]], i32 [[V]]) ; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[RET]] ; %inc = getelementptr i32, ptr addrspace(7) %x, i32 1 @@ -110,9 +109,8 @@ define internal noalias noundef nonnull ptr addrspace(7) @foo(ptr addrspace(7) n ; CHECK-NEXT: [[ARG_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[ARG]], 0 ; CHECK-NEXT: [[ARG_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[ARG]], 1 ; CHECK-NEXT: [[RET:%.*]] = add nuw i32 [[ARG_OFF]], 4 -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[ARG_RSRC]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[RET]], 1 -; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } [[ARG]], i32 [[RET]], 1 +; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP1]] ; %ret = getelementptr inbounds i32, ptr addrspace(7) %arg, i32 1 ret ptr addrspace(7) %ret diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-contents-legalization.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-contents-legalization.ll index d18f0f8bd1ff9..a8e67a4a61816 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-contents-legalization.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-contents-legalization.ll @@ -1898,10 +1898,9 @@ define void @store_v32i6(<6 x i32> %data.abi, ptr addrspace(8) inreg %buf) { ; CHECK-LABEL: define void @store_v32i6( ; CHECK-SAME: <6 x i32> [[DATA_ABI:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[DATA:%.*]] = bitcast <6 x i32> [[DATA_ABI]] to <32 x i6> -; CHECK-NEXT: [[DATA_LEGAL:%.*]] = bitcast <32 x i6> [[DATA]] to <6 x i32> -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <6 x i32> [[DATA_LEGAL]], <6 x i32> poison, <4 x i32> +; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <6 x i32> [[DATA_ABI]], <6 x i32> poison, <4 x i32> ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DATA_SLICE_0]], ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_4:%.*]] = shufflevector <6 x i32> [[DATA_LEGAL]], <6 x i32> poison, <2 x i32> +; CHECK-NEXT: [[DATA_SLICE_4:%.*]] = shufflevector <6 x i32> [[DATA_ABI]], <6 x i32> poison, <2 x i32> ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> [[DATA_SLICE_4]], ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) ; CHECK-NEXT: ret void ; diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-mem-transfer.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-mem-transfer.ll index e6c2d1907068f..ee51b0b84554e 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-mem-transfer.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-mem-transfer.ll @@ -133,8 +133,7 @@ define void @memcpy_known(ptr addrspace(7) inreg %src, ptr addrspace(7) inreg %d ; CHECK-NEXT: [[DOTSLICE_56:%.*]] = shufflevector <64 x i32> [[TMP2]], <64 x i32> poison, <4 x i32> ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DOTSLICE_56]], ptr addrspace(8) align 16 [[DST_RSRC]], i32 [[DOTPART_56]], i32 0, i32 0) ; CHECK-NEXT: [[DOTPART_60:%.*]] = add nuw i32 [[TMP3]], 240 -; CHECK-NEXT: [[DOTSLICE_60:%.*]] = shufflevector <64 x i32> [[TMP2]], <64 x i32> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DOTSLICE_60]], ptr addrspace(8) align 16 [[DST_RSRC]], i32 [[DOTPART_60]], i32 0, i32 0) +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DOTOFF_240]], ptr addrspace(8) align 16 [[DST_RSRC]], i32 [[DOTPART_60]], i32 0, i32 0) ; CHECK-NEXT: [[TMP4]] = add i32 [[LOOP_INDEX]], 256 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 8192 ; CHECK-NEXT: br i1 [[TMP5]], label %[[LOAD_STORE_LOOP]], label %[[MEMCPY_SPLIT:.*]] @@ -328,8 +327,7 @@ define void @memcpy_known_i64(ptr addrspace(7) inreg %src, ptr addrspace(7) inre ; CHECK-NEXT: [[DOTSLICE_56:%.*]] = shufflevector <64 x i32> [[TMP2]], <64 x i32> poison, <4 x i32> ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DOTSLICE_56]], ptr addrspace(8) align 1 [[DST_RSRC]], i32 [[DOTPART_56]], i32 0, i32 0) ; CHECK-NEXT: [[DOTPART_60:%.*]] = add nuw i32 [[TMP3]], 240 -; CHECK-NEXT: [[DOTSLICE_60:%.*]] = shufflevector <64 x i32> [[TMP2]], <64 x i32> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DOTSLICE_60]], ptr addrspace(8) align 1 [[DST_RSRC]], i32 [[DOTPART_60]], i32 0, i32 0) +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DOTOFF_240]], ptr addrspace(8) align 1 [[DST_RSRC]], i32 [[DOTPART_60]], i32 0, i32 0) ; CHECK-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 256 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 8192 ; CHECK-NEXT: br i1 [[TMP5]], label %[[LOAD_STORE_LOOP]], label %[[MEMCPY_SPLIT:.*]] @@ -792,8 +790,7 @@ define void @memcpy.inline_known(ptr addrspace(7) inreg %src, ptr addrspace(7) i ; CHECK-NEXT: [[DOTSLICE_56:%.*]] = shufflevector <64 x i32> [[TMP2]], <64 x i32> poison, <4 x i32> ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DOTSLICE_56]], ptr addrspace(8) align 16 [[DST_RSRC]], i32 [[DOTPART_56]], i32 0, i32 0) ; CHECK-NEXT: [[DOTPART_60:%.*]] = add nuw i32 [[TMP3]], 240 -; CHECK-NEXT: [[DOTSLICE_60:%.*]] = shufflevector <64 x i32> [[TMP2]], <64 x i32> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DOTSLICE_60]], ptr addrspace(8) align 16 [[DST_RSRC]], i32 [[DOTPART_60]], i32 0, i32 0) +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DOTOFF_240]], ptr addrspace(8) align 16 [[DST_RSRC]], i32 [[DOTPART_60]], i32 0, i32 0) ; CHECK-NEXT: [[TMP4]] = add i32 [[LOOP_INDEX]], 256 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i32 [[TMP4]], 8192 ; CHECK-NEXT: br i1 [[TMP5]], label %[[LOAD_STORE_LOOP]], label %[[MEMCPY_SPLIT:.*]] @@ -987,8 +984,7 @@ define void @memcpy.inline_known_i64(ptr addrspace(7) inreg %src, ptr addrspace( ; CHECK-NEXT: [[DOTSLICE_56:%.*]] = shufflevector <64 x i32> [[TMP2]], <64 x i32> poison, <4 x i32> ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DOTSLICE_56]], ptr addrspace(8) align 1 [[DST_RSRC]], i32 [[DOTPART_56]], i32 0, i32 0) ; CHECK-NEXT: [[DOTPART_60:%.*]] = add nuw i32 [[TMP3]], 240 -; CHECK-NEXT: [[DOTSLICE_60:%.*]] = shufflevector <64 x i32> [[TMP2]], <64 x i32> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DOTSLICE_60]], ptr addrspace(8) align 1 [[DST_RSRC]], i32 [[DOTPART_60]], i32 0, i32 0) +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DOTOFF_240]], ptr addrspace(8) align 1 [[DST_RSRC]], i32 [[DOTPART_60]], i32 0, i32 0) ; CHECK-NEXT: [[TMP4]] = add i64 [[LOOP_INDEX]], 256 ; CHECK-NEXT: [[TMP5:%.*]] = icmp ult i64 [[TMP4]], 8192 ; CHECK-NEXT: br i1 [[TMP5]], label %[[LOAD_STORE_LOOP]], label %[[MEMCPY_SPLIT:.*]] diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-p7-in-memory.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-p7-in-memory.ll index 9b2e2f950a39d..a8473927e1bd8 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-p7-in-memory.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-p7-in-memory.ll @@ -14,11 +14,7 @@ define void @scalar_copy(ptr %a, ptr %b) { ; CHECK-NEXT: [[X_PTR_RSRC:%.*]] = inttoptr i128 [[TMP2]] to ptr addrspace(8) ; CHECK-NEXT: [[X_PTR_OFF:%.*]] = trunc i160 [[X]] to i32 ; CHECK-NEXT: [[B1:%.*]] = getelementptr i160, ptr [[B]], i64 1 -; CHECK-NEXT: [[X_PTR_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[X_PTR_RSRC]] to i160 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i160 [[X_PTR_INT_RSRC]], 32 -; CHECK-NEXT: [[X_PTR_INT_OFF:%.*]] = zext i32 [[X_PTR_OFF]] to i160 -; CHECK-NEXT: [[X_PTR_INT:%.*]] = or i160 [[TMP3]], [[X_PTR_INT_OFF]] -; CHECK-NEXT: store i160 [[X_PTR_INT]], ptr [[B1]], align 32 +; CHECK-NEXT: store i160 [[X]], ptr [[B1]], align 32 ; CHECK-NEXT: ret void ; %x = load ptr addrspace(7), ptr %a @@ -36,11 +32,7 @@ define void @vector_copy(ptr %a, ptr %b) { ; CHECK-NEXT: [[X_PTR_RSRC:%.*]] = inttoptr <4 x i128> [[TMP2]] to <4 x ptr addrspace(8)> ; CHECK-NEXT: [[X_PTR_OFF:%.*]] = trunc <4 x i160> [[X]] to <4 x i32> ; CHECK-NEXT: [[B1:%.*]] = getelementptr <4 x i160>, ptr [[B]], i64 2 -; CHECK-NEXT: [[X_PTR_INT_RSRC:%.*]] = ptrtoint <4 x ptr addrspace(8)> [[X_PTR_RSRC]] to <4 x i160> -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw <4 x i160> [[X_PTR_INT_RSRC]], splat (i160 32) -; CHECK-NEXT: [[X_PTR_INT_OFF:%.*]] = zext <4 x i32> [[X_PTR_OFF]] to <4 x i160> -; CHECK-NEXT: [[X_PTR_INT:%.*]] = or <4 x i160> [[TMP3]], [[X_PTR_INT_OFF]] -; CHECK-NEXT: store <4 x i160> [[X_PTR_INT]], ptr [[B1]], align 128 +; CHECK-NEXT: store <4 x i160> [[X]], ptr [[B1]], align 128 ; CHECK-NEXT: ret void ; %x = load <4 x ptr addrspace(7)>, ptr %a @@ -59,21 +51,13 @@ define void @alloca(ptr %a, ptr %b) { ; CHECK-NEXT: [[X_PTR_RSRC:%.*]] = inttoptr i128 [[TMP2]] to ptr addrspace(8) ; CHECK-NEXT: [[X_PTR_OFF:%.*]] = trunc i160 [[X]] to i32 ; CHECK-NEXT: [[L:%.*]] = getelementptr i160, ptr addrspace(5) [[ALLOCA]], i32 1 -; CHECK-NEXT: [[X_PTR_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[X_PTR_RSRC]] to i160 -; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i160 [[X_PTR_INT_RSRC]], 32 -; CHECK-NEXT: [[X_PTR_INT_OFF:%.*]] = zext i32 [[X_PTR_OFF]] to i160 -; CHECK-NEXT: [[X_PTR_INT:%.*]] = or i160 [[TMP3]], [[X_PTR_INT_OFF]] -; CHECK-NEXT: store i160 [[X_PTR_INT]], ptr addrspace(5) [[L]], align 32 +; CHECK-NEXT: store i160 [[X]], ptr addrspace(5) [[L]], align 32 ; CHECK-NEXT: [[Y:%.*]] = load i160, ptr addrspace(5) [[L]], align 32 -; CHECK-NEXT: [[TMP4:%.*]] = lshr i160 [[Y]], 32 -; CHECK-NEXT: [[TMP5:%.*]] = trunc i160 [[TMP4]] to i128 -; CHECK-NEXT: [[Y_PTR_RSRC:%.*]] = inttoptr i128 [[TMP5]] to ptr addrspace(8) +; CHECK-NEXT: [[TMP3:%.*]] = lshr i160 [[Y]], 32 +; CHECK-NEXT: [[TMP4:%.*]] = trunc i160 [[TMP3]] to i128 +; CHECK-NEXT: [[Y_PTR_RSRC:%.*]] = inttoptr i128 [[TMP4]] to ptr addrspace(8) ; CHECK-NEXT: [[Y_PTR_OFF:%.*]] = trunc i160 [[Y]] to i32 -; CHECK-NEXT: [[Y_PTR_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[Y_PTR_RSRC]] to i160 -; CHECK-NEXT: [[TMP6:%.*]] = shl nuw i160 [[Y_PTR_INT_RSRC]], 32 -; CHECK-NEXT: [[Y_PTR_INT_OFF:%.*]] = zext i32 [[Y_PTR_OFF]] to i160 -; CHECK-NEXT: [[Y_PTR_INT:%.*]] = or i160 [[TMP6]], [[Y_PTR_INT_OFF]] -; CHECK-NEXT: store i160 [[Y_PTR_INT]], ptr [[B]], align 32 +; CHECK-NEXT: store i160 [[Y]], ptr [[B]], align 32 ; CHECK-NEXT: ret void ; %alloca = alloca [5 x ptr addrspace(7)], addrspace(5) @@ -117,35 +101,7 @@ define void @complex_copy(ptr %a, ptr %b) { ; CHECK-NEXT: [[TMP18:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[X_2_PTR_RSRC]], 0 ; CHECK-NEXT: [[X_2_PTR:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP18]], i32 [[X_2_PTR_OFF]], 1 ; CHECK-NEXT: [[TMP19:%.*]] = insertvalue { [2 x { ptr addrspace(8), i32 }], i32, { ptr addrspace(8), i32 } } [[TMP14]], { ptr addrspace(8), i32 } [[X_2_PTR]], 2 -; CHECK-NEXT: [[TMP20:%.*]] = extractvalue { [2 x { ptr addrspace(8), i32 }], i32, { ptr addrspace(8), i32 } } [[TMP19]], 0 -; CHECK-NEXT: [[TMP21:%.*]] = extractvalue [2 x { ptr addrspace(8), i32 }] [[TMP20]], 0 -; CHECK-NEXT: [[DOTRSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[TMP21]], 0 -; CHECK-NEXT: [[DOTOFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[TMP21]], 1 -; CHECK-NEXT: [[DOT0_0_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[DOTRSRC]] to i160 -; CHECK-NEXT: [[TMP22:%.*]] = shl nuw i160 [[DOT0_0_INT_RSRC]], 32 -; CHECK-NEXT: [[DOT0_0_INT_OFF:%.*]] = zext i32 [[DOTOFF]] to i160 -; CHECK-NEXT: [[DOT0_0_INT:%.*]] = or i160 [[TMP22]], [[DOT0_0_INT_OFF]] -; CHECK-NEXT: [[TMP23:%.*]] = insertvalue [2 x i160] poison, i160 [[DOT0_0_INT]], 0 -; CHECK-NEXT: [[TMP24:%.*]] = extractvalue [2 x { ptr addrspace(8), i32 }] [[TMP20]], 1 -; CHECK-NEXT: [[DOTRSRC1:%.*]] = extractvalue { ptr addrspace(8), i32 } [[TMP24]], 0 -; CHECK-NEXT: [[DOTOFF2:%.*]] = extractvalue { ptr addrspace(8), i32 } [[TMP24]], 1 -; CHECK-NEXT: [[DOT0_1_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[DOTRSRC1]] to i160 -; CHECK-NEXT: [[TMP25:%.*]] = shl nuw i160 [[DOT0_1_INT_RSRC]], 32 -; CHECK-NEXT: [[DOT0_1_INT_OFF:%.*]] = zext i32 [[DOTOFF2]] to i160 -; CHECK-NEXT: [[DOT0_1_INT:%.*]] = or i160 [[TMP25]], [[DOT0_1_INT_OFF]] -; CHECK-NEXT: [[TMP26:%.*]] = insertvalue [2 x i160] [[TMP23]], i160 [[DOT0_1_INT]], 1 -; CHECK-NEXT: [[TMP27:%.*]] = insertvalue { [2 x i160], i32, i160 } poison, [2 x i160] [[TMP26]], 0 -; CHECK-NEXT: [[TMP28:%.*]] = extractvalue { [2 x { ptr addrspace(8), i32 }], i32, { ptr addrspace(8), i32 } } [[TMP19]], 1 -; CHECK-NEXT: [[TMP29:%.*]] = insertvalue { [2 x i160], i32, i160 } [[TMP27]], i32 [[TMP28]], 1 -; CHECK-NEXT: [[TMP30:%.*]] = extractvalue { [2 x { ptr addrspace(8), i32 }], i32, { ptr addrspace(8), i32 } } [[TMP19]], 2 -; CHECK-NEXT: [[DOTRSRC3:%.*]] = extractvalue { ptr addrspace(8), i32 } [[TMP30]], 0 -; CHECK-NEXT: [[DOTOFF4:%.*]] = extractvalue { ptr addrspace(8), i32 } [[TMP30]], 1 -; CHECK-NEXT: [[DOT2_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[DOTRSRC3]] to i160 -; CHECK-NEXT: [[TMP31:%.*]] = shl nuw i160 [[DOT2_INT_RSRC]], 32 -; CHECK-NEXT: [[DOT2_INT_OFF:%.*]] = zext i32 [[DOTOFF4]] to i160 -; CHECK-NEXT: [[DOT2_INT:%.*]] = or i160 [[TMP31]], [[DOT2_INT_OFF]] -; CHECK-NEXT: [[TMP32:%.*]] = insertvalue { [2 x i160], i32, i160 } [[TMP29]], i160 [[DOT2_INT]], 2 -; CHECK-NEXT: store { [2 x i160], i32, i160 } [[TMP32]], ptr [[B]], align 32 +; CHECK-NEXT: store { [2 x i160], i32, i160 } [[X]], ptr [[B]], align 32 ; CHECK-NEXT: ret void ; %x = load {[2 x ptr addrspace(7)], i32, ptr addrspace(7)}, ptr %a diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll index ea4117b418959..b0658031356bb 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-pointer-ops.ll @@ -14,9 +14,8 @@ define ptr addrspace(7) @gep(ptr addrspace(7) %in, i32 %idx) { ; CHECK-NEXT: [[RET_OFFS:%.*]] = add nsw i32 [[RET_IDX]], 8 ; CHECK-NEXT: [[RET_OFFS1:%.*]] = add nsw i32 [[RET_OFFS]], 24 ; CHECK-NEXT: [[RET:%.*]] = add i32 [[IN_OFF]], [[RET_OFFS1]] -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[IN_RSRC]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[RET]], 1 -; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } [[IN]], i32 [[RET]], 1 +; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP1]] ; %ret = getelementptr inbounds {i32, [4 x ptr]}, ptr addrspace(7) %in, i32 %idx, i32 1, i32 3 ret ptr addrspace(7) %ret @@ -31,9 +30,8 @@ define <2 x ptr addrspace(7)> @gep_vectors(<2 x ptr addrspace(7)> %in, <2 x i32> ; CHECK-NEXT: [[RET_OFFS:%.*]] = add nsw <2 x i32> [[RET_IDX]], splat (i32 8) ; CHECK-NEXT: [[RET_OFFS1:%.*]] = add nsw <2 x i32> [[RET_OFFS]], splat (i32 24) ; CHECK-NEXT: [[RET:%.*]] = add <2 x i32> [[IN_OFF]], [[RET_OFFS1]] -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } poison, <2 x ptr addrspace(8)> [[IN_RSRC]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP1]], <2 x i32> [[RET]], 1 -; CHECK-NEXT: ret { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[IN]], <2 x i32> [[RET]], 1 +; CHECK-NEXT: ret { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP1]] ; %ret = getelementptr inbounds {i32, [4 x ptr]}, <2 x ptr addrspace(7)> %in, <2 x i32> %idx, i32 1, i32 3 ret <2 x ptr addrspace(7)> %ret @@ -51,9 +49,8 @@ define <2 x ptr addrspace(7)> @gep_vector_scalar(<2 x ptr addrspace(7)> %in, i64 ; CHECK-NEXT: [[RET_OFFS:%.*]] = add nsw <2 x i32> [[RET_IDX]], splat (i32 8) ; CHECK-NEXT: [[RET_OFFS1:%.*]] = add nsw <2 x i32> [[RET_OFFS]], splat (i32 24) ; CHECK-NEXT: [[RET:%.*]] = add <2 x i32> [[IN_OFF]], [[RET_OFFS1]] -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } poison, <2 x ptr addrspace(8)> [[IN_RSRC]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP1]], <2 x i32> [[RET]], 1 -; CHECK-NEXT: ret { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { <2 x ptr addrspace(8)>, <2 x i32> } [[IN]], <2 x i32> [[RET]], 1 +; CHECK-NEXT: ret { <2 x ptr addrspace(8)>, <2 x i32> } [[TMP1]] ; %ret = getelementptr inbounds {i32, [4 x ptr]}, <2 x ptr addrspace(7)> %in, i64 %idx, i32 1, i32 3 ret <2 x ptr addrspace(7)> %ret @@ -84,9 +81,8 @@ define ptr addrspace(7) @simple_gep(ptr addrspace(7) %ptr, i32 %off) { ; CHECK-NEXT: [[PTR_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 1 ; CHECK-NEXT: [[RET_IDX:%.*]] = mul i32 [[OFF]], 4 ; CHECK-NEXT: [[RET:%.*]] = add i32 [[PTR_OFF]], [[RET_IDX]] -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[PTR_RSRC]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[RET]], 1 -; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } [[PTR]], i32 [[RET]], 1 +; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP1]] ; %ret = getelementptr i32, ptr addrspace(7) %ptr, i32 %off ret ptr addrspace(7) %ret @@ -99,9 +95,8 @@ define ptr addrspace(7) @simple_inbounds_gep(ptr addrspace(7) %ptr, i32 %off) { ; CHECK-NEXT: [[PTR_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 1 ; CHECK-NEXT: [[RET_IDX:%.*]] = mul nsw i32 [[OFF]], 4 ; CHECK-NEXT: [[RET:%.*]] = add i32 [[PTR_OFF]], [[RET_IDX]] -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[PTR_RSRC]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[RET]], 1 -; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } [[PTR]], i32 [[RET]], 1 +; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP1]] ; %ret = getelementptr inbounds i32, ptr addrspace(7) %ptr, i32 %off ret ptr addrspace(7) %ret @@ -114,9 +109,8 @@ define ptr addrspace(7) @simple_nuw_gep(ptr addrspace(7) %ptr, i32 %off) { ; CHECK-NEXT: [[PTR_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 1 ; CHECK-NEXT: [[RET_IDX:%.*]] = mul nuw i32 [[OFF]], 4 ; CHECK-NEXT: [[RET:%.*]] = add nuw i32 [[PTR_OFF]], [[RET_IDX]] -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[PTR_RSRC]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[RET]], 1 -; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } [[PTR]], i32 [[RET]], 1 +; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP1]] ; %ret = getelementptr nuw i32, ptr addrspace(7) %ptr, i32 %off ret ptr addrspace(7) %ret @@ -129,9 +123,8 @@ define ptr addrspace(7) @simple_nusw_gep(ptr addrspace(7) %ptr, i32 %off) { ; CHECK-NEXT: [[PTR_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 1 ; CHECK-NEXT: [[RET_IDX:%.*]] = mul nsw i32 [[OFF]], 4 ; CHECK-NEXT: [[RET:%.*]] = add i32 [[PTR_OFF]], [[RET_IDX]] -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[PTR_RSRC]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[RET]], 1 -; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } [[PTR]], i32 [[RET]], 1 +; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP1]] ; %ret = getelementptr nusw i32, ptr addrspace(7) %ptr, i32 %off ret ptr addrspace(7) %ret @@ -145,9 +138,8 @@ define ptr addrspace(7) @nusw_gep_pair(ptr addrspace(7) %ptr, i32 %off) { ; CHECK-NEXT: [[P1_IDX:%.*]] = mul nsw i32 [[OFF]], 4 ; CHECK-NEXT: [[P1:%.*]] = add i32 [[PTR_OFF]], [[P1_IDX]] ; CHECK-NEXT: [[RET:%.*]] = add nuw i32 [[P1]], 16 -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[PTR_RSRC]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[RET]], 1 -; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } [[PTR]], i32 [[RET]], 1 +; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP1]] ; %p1 = getelementptr nusw i32, ptr addrspace(7) %ptr, i32 %off %ret = getelementptr nusw i32, ptr addrspace(7) %p1, i32 4 @@ -156,11 +148,9 @@ define ptr addrspace(7) @nusw_gep_pair(ptr addrspace(7) %ptr, i32 %off) { define ptr addrspace(7) @zero_gep(ptr addrspace(7) %ptr) { ; CHECK-LABEL: define { ptr addrspace(8), i32 } @zero_gep -; CHECK-SAME: ({ ptr addrspace(8), i32 } [[PTR:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[PTR_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 0 -; CHECK-NEXT: [[PTR_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[PTR]], 1 -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[PTR_RSRC]], 0 -; CHECK-NEXT: [[RET:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[PTR_OFF]], 1 +; CHECK-SAME: ({ ptr addrspace(8), i32 } [[RET:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[PTR_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[RET]], 0 +; CHECK-NEXT: [[PTR_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[RET]], 1 ; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[RET]] ; %ret = getelementptr i8, ptr addrspace(7) %ptr, i32 0 @@ -173,8 +163,7 @@ define ptr addrspace(7) @zero_gep_goes_second(ptr addrspace(7) %v0, i32 %arg) { ; CHECK-NEXT: [[V0_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[V0]], 0 ; CHECK-NEXT: [[V0_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[V0]], 1 ; CHECK-NEXT: [[V1:%.*]] = add i32 [[V0_OFF]], [[ARG]] -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[V0_RSRC]], 0 -; CHECK-NEXT: [[V2:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[V1]], 1 +; CHECK-NEXT: [[V2:%.*]] = insertvalue { ptr addrspace(8), i32 } [[V0]], i32 [[V1]], 1 ; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[V2]] ; %v1 = getelementptr i8, ptr addrspace(7) %v0, i32 %arg @@ -188,9 +177,8 @@ define ptr addrspace(7) @zero_gep_goes_first(ptr addrspace(7) %v0, i32 %arg) { ; CHECK-NEXT: [[V0_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[V0]], 0 ; CHECK-NEXT: [[V0_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[V0]], 1 ; CHECK-NEXT: [[V2:%.*]] = add i32 [[V0_OFF]], [[ARG]] -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[V0_RSRC]], 0 -; CHECK-NEXT: [[TMP2:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[V2]], 1 -; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP2]] +; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } [[V0]], i32 [[V2]], 1 +; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[TMP1]] ; %v1 = getelementptr i8, ptr addrspace(7) %v0, i32 0 %v2 = getelementptr i8, ptr addrspace(7) %v1, i32 %arg @@ -316,11 +304,7 @@ define ptr addrspace(7) @inttoptr_long(i256 %v) { define ptr addrspace(7) @inttoptr_offset(i32 %v) { ; CHECK-LABEL: define { ptr addrspace(8), i32 } @inttoptr_offset ; CHECK-SAME: (i32 [[V:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[TMP1:%.*]] = lshr i32 [[V]], 32 -; CHECK-NEXT: [[TMP2:%.*]] = zext i32 [[TMP1]] to i128 -; CHECK-NEXT: [[RET_RSRC:%.*]] = inttoptr i128 [[TMP2]] to ptr addrspace(8) -; CHECK-NEXT: [[TMP3:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[RET_RSRC]], 0 -; CHECK-NEXT: [[RET:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP3]], i32 [[V]], 1 +; CHECK-NEXT: [[RET:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, i32 [[V]], 1 ; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[RET]] ; %ret = inttoptr i32 %v to ptr addrspace(7) @@ -514,8 +498,7 @@ define ptr addrspace(7) @ptrmask(ptr addrspace(7) %p, i32 %mask) { ; CHECK-NEXT: [[P_RSRC:%.*]] = extractvalue { ptr addrspace(8), i32 } [[P]], 0 ; CHECK-NEXT: [[P_OFF:%.*]] = extractvalue { ptr addrspace(8), i32 } [[P]], 1 ; CHECK-NEXT: [[RET_OFF:%.*]] = and i32 [[P_OFF]], [[MASK]] -; CHECK-NEXT: [[TMP1:%.*]] = insertvalue { ptr addrspace(8), i32 } poison, ptr addrspace(8) [[P_RSRC]], 0 -; CHECK-NEXT: [[RET:%.*]] = insertvalue { ptr addrspace(8), i32 } [[TMP1]], i32 [[RET_OFF]], 1 +; CHECK-NEXT: [[RET:%.*]] = insertvalue { ptr addrspace(8), i32 } [[P]], i32 [[RET_OFF]], 1 ; CHECK-NEXT: ret { ptr addrspace(8), i32 } [[RET]] ; %ret = call ptr addrspace(7) @llvm.ptrmask.p7.i32(ptr addrspace(7) %p, i32 %mask) diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll index 95679a593259a..ef3026356f5fe 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll @@ -14,52 +14,46 @@ define float @debug_stash_pointer(ptr addrspace(8) %buf, i32 %idx, ptr addrspace ; CHECK-NEXT: #dbg_value(ptr addrspace(5) [[AUX_PTR_VAR]], [[META12:![0-9]+]], !DIExpression(), [[DBG22]]) ; CHECK-NEXT: #dbg_value({ ptr addrspace(8), i32 } poison, [[META13:![0-9]+]], !DIExpression(), [[META23:![0-9]+]]) ; CHECK-NEXT: [[BUF_PTR_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[BUF]] to i160, !dbg [[DBG24:![0-9]+]] -; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i160 [[BUF_PTR_INT_RSRC]], 32, !dbg [[DBG24]] -; CHECK-NEXT: [[BUF_PTR_INT:%.*]] = or i160 [[TMP1]], 0, !dbg [[DBG24]] +; CHECK-NEXT: [[BUF_PTR_INT:%.*]] = shl nuw i160 [[BUF_PTR_INT_RSRC]], 32, !dbg [[DBG24]] ; CHECK-NEXT: store i160 [[BUF_PTR_INT]], ptr addrspace(5) [[BUF_PTR_VAR]], align 32, !dbg [[DBG24]] ; CHECK-NEXT: #dbg_value({ ptr addrspace(8), i32 } poison, [[META15:![0-9]+]], !DIExpression(), [[META25:![0-9]+]]) ; CHECK-NEXT: [[AUX_PTR_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[AUX]] to i160, !dbg [[DBG26:![0-9]+]] -; CHECK-NEXT: [[TMP2:%.*]] = shl nuw i160 [[AUX_PTR_INT_RSRC]], 32, !dbg [[DBG26]] -; CHECK-NEXT: [[AUX_PTR_INT:%.*]] = or i160 [[TMP2]], 0, !dbg [[DBG26]] +; CHECK-NEXT: [[AUX_PTR_INT:%.*]] = shl nuw i160 [[AUX_PTR_INT_RSRC]], 32, !dbg [[DBG26]] ; CHECK-NEXT: store i160 [[AUX_PTR_INT]], ptr addrspace(5) [[AUX_PTR_VAR]], align 32, !dbg [[DBG26]] ; CHECK-NEXT: [[BUF_PTR_2:%.*]] = load i160, ptr addrspace(5) [[BUF_PTR_VAR]], align 32, !dbg [[DBG27:![0-9]+]] -; CHECK-NEXT: [[TMP3:%.*]] = lshr i160 [[BUF_PTR_2]], 32, !dbg [[DBG27]] -; CHECK-NEXT: [[TMP4:%.*]] = trunc i160 [[TMP3]] to i128, !dbg [[DBG27]] -; CHECK-NEXT: [[BUF_PTR_2_PTR_RSRC:%.*]] = inttoptr i128 [[TMP4]] to ptr addrspace(8), !dbg [[DBG27]] +; CHECK-NEXT: [[TMP1:%.*]] = lshr i160 [[BUF_PTR_2]], 32, !dbg [[DBG27]] +; CHECK-NEXT: [[TMP2:%.*]] = trunc i160 [[TMP1]] to i128, !dbg [[DBG27]] +; CHECK-NEXT: [[BUF_PTR_2_PTR_RSRC:%.*]] = inttoptr i128 [[TMP2]] to ptr addrspace(8), !dbg [[DBG27]] ; CHECK-NEXT: [[BUF_PTR_2_PTR_OFF:%.*]] = trunc i160 [[BUF_PTR_2]] to i32, !dbg [[DBG27]] ; CHECK-NEXT: #dbg_value({ ptr addrspace(8), i32 } poison, [[META16:![0-9]+]], !DIExpression(), [[DBG27]]) ; CHECK-NEXT: [[BUF_PTR_3_IDX:%.*]] = mul i32 [[IDX]], 4, !dbg [[DBG28:![0-9]+]] ; CHECK-NEXT: [[BUF_PTR_3:%.*]] = add i32 [[BUF_PTR_2_PTR_OFF]], [[BUF_PTR_3_IDX]], !dbg [[DBG28]] ; CHECK-NEXT: #dbg_value({ ptr addrspace(8), i32 } poison, [[META17:![0-9]+]], !DIExpression(), [[DBG28]]) ; CHECK-NEXT: [[BUF_PTR_3_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[BUF_PTR_2_PTR_RSRC]] to i160, !dbg [[DBG29:![0-9]+]] -; CHECK-NEXT: [[TMP5:%.*]] = shl nuw i160 [[BUF_PTR_3_INT_RSRC]], 32, !dbg [[DBG29]] +; CHECK-NEXT: [[TMP3:%.*]] = shl nuw i160 [[BUF_PTR_3_INT_RSRC]], 32, !dbg [[DBG29]] ; CHECK-NEXT: [[BUF_PTR_3_INT_OFF:%.*]] = zext i32 [[BUF_PTR_3]] to i160, !dbg [[DBG29]] -; CHECK-NEXT: [[BUF_PTR_3_INT:%.*]] = or i160 [[TMP5]], [[BUF_PTR_3_INT_OFF]], !dbg [[DBG29]] +; CHECK-NEXT: [[BUF_PTR_3_INT:%.*]] = or i160 [[TMP3]], [[BUF_PTR_3_INT_OFF]], !dbg [[DBG29]] ; CHECK-NEXT: store i160 [[BUF_PTR_3_INT]], ptr addrspace(5) [[BUF_PTR_VAR]], align 32, !dbg [[DBG29]] ; CHECK-NEXT: [[BUF_PTR_4:%.*]] = load i160, ptr addrspace(5) [[BUF_PTR_VAR]], align 32, !dbg [[DBG30:![0-9]+]] -; CHECK-NEXT: [[TMP6:%.*]] = lshr i160 [[BUF_PTR_4]], 32, !dbg [[DBG30]] -; CHECK-NEXT: [[TMP7:%.*]] = trunc i160 [[TMP6]] to i128, !dbg [[DBG30]] -; CHECK-NEXT: [[BUF_PTR_4_PTR_RSRC:%.*]] = inttoptr i128 [[TMP7]] to ptr addrspace(8), !dbg [[DBG30]] +; CHECK-NEXT: [[TMP4:%.*]] = lshr i160 [[BUF_PTR_4]], 32, !dbg [[DBG30]] +; CHECK-NEXT: [[TMP5:%.*]] = trunc i160 [[TMP4]] to i128, !dbg [[DBG30]] +; CHECK-NEXT: [[BUF_PTR_4_PTR_RSRC:%.*]] = inttoptr i128 [[TMP5]] to ptr addrspace(8), !dbg [[DBG30]] ; CHECK-NEXT: [[BUF_PTR_4_PTR_OFF:%.*]] = trunc i160 [[BUF_PTR_4]] to i32, !dbg [[DBG30]] ; CHECK-NEXT: #dbg_value({ ptr addrspace(8), i32 } poison, [[META18:![0-9]+]], !DIExpression(), [[DBG30]]) ; CHECK-NEXT: [[RET:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF_PTR_4_PTR_RSRC]], i32 [[BUF_PTR_4_PTR_OFF]], i32 0, i32 0), !dbg [[DBG31:![0-9]+]] ; CHECK-NEXT: #dbg_value(float [[RET]], [[META19:![0-9]+]], !DIExpression(), [[DBG31]]) ; CHECK-NEXT: [[AUX_PTR_2:%.*]] = load i160, ptr addrspace(5) [[AUX_PTR_VAR]], align 32, !dbg [[DBG32:![0-9]+]] -; CHECK-NEXT: [[TMP8:%.*]] = lshr i160 [[AUX_PTR_2]], 32, !dbg [[DBG32]] -; CHECK-NEXT: [[TMP9:%.*]] = trunc i160 [[TMP8]] to i128, !dbg [[DBG32]] -; CHECK-NEXT: [[AUX_PTR_2_PTR_RSRC:%.*]] = inttoptr i128 [[TMP9]] to ptr addrspace(8), !dbg [[DBG32]] +; CHECK-NEXT: [[TMP6:%.*]] = lshr i160 [[AUX_PTR_2]], 32, !dbg [[DBG32]] +; CHECK-NEXT: [[TMP7:%.*]] = trunc i160 [[TMP6]] to i128, !dbg [[DBG32]] +; CHECK-NEXT: [[AUX_PTR_2_PTR_RSRC:%.*]] = inttoptr i128 [[TMP7]] to ptr addrspace(8), !dbg [[DBG32]] ; CHECK-NEXT: [[AUX_PTR_2_PTR_OFF:%.*]] = trunc i160 [[AUX_PTR_2]] to i32, !dbg [[DBG32]] ; CHECK-NEXT: #dbg_value({ ptr addrspace(8), i32 } poison, [[META20:![0-9]+]], !DIExpression(), [[DBG32]]) -; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_RSRC:%.*]] = ptrtoint ptr addrspace(8) [[BUF_PTR_4_PTR_RSRC]] to i160, !dbg [[DBG33:![0-9]+]] -; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i160 [[BUF_PTR_4_PTR_INT_RSRC]], 32, !dbg [[DBG33]] -; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_OFF:%.*]] = zext i32 [[BUF_PTR_4_PTR_OFF]] to i160, !dbg [[DBG33]] -; CHECK-NEXT: [[BUF_PTR_4_PTR_INT:%.*]] = or i160 [[TMP10]], [[BUF_PTR_4_PTR_INT_OFF]], !dbg [[DBG33]] -; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_LEGAL:%.*]] = bitcast i160 [[BUF_PTR_4_PTR_INT]] to <5 x i32>, !dbg [[DBG33]] -; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_SLICE_0:%.*]] = shufflevector <5 x i32> [[BUF_PTR_4_PTR_INT_LEGAL]], <5 x i32> poison, <4 x i32> , !dbg [[DBG33]] -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[BUF_PTR_4_PTR_INT_SLICE_0]], ptr addrspace(8) align 32 [[AUX_PTR_2_PTR_RSRC]], i32 [[AUX_PTR_2_PTR_OFF]], i32 0, i32 0), !dbg [[DBG33]] +; CHECK-NEXT: [[BUF_PTR_4_LEGAL:%.*]] = bitcast i160 [[BUF_PTR_4]] to <5 x i32>, !dbg [[DBG33:![0-9]+]] +; CHECK-NEXT: [[BUF_PTR_4_SLICE_0:%.*]] = shufflevector <5 x i32> [[BUF_PTR_4_LEGAL]], <5 x i32> poison, <4 x i32> , !dbg [[DBG33]] +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[BUF_PTR_4_SLICE_0]], ptr addrspace(8) align 32 [[AUX_PTR_2_PTR_RSRC]], i32 [[AUX_PTR_2_PTR_OFF]], i32 0, i32 0), !dbg [[DBG33]] ; CHECK-NEXT: [[AUX_PTR_2_PTR_PART_4:%.*]] = add nuw i32 [[AUX_PTR_2_PTR_OFF]], 16, !dbg [[DBG33]] -; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_SLICE_4:%.*]] = extractelement <5 x i32> [[BUF_PTR_4_PTR_INT_LEGAL]], i64 4, !dbg [[DBG33]] -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[BUF_PTR_4_PTR_INT_SLICE_4]], ptr addrspace(8) align 16 [[AUX_PTR_2_PTR_RSRC]], i32 [[AUX_PTR_2_PTR_PART_4]], i32 0, i32 0), !dbg [[DBG33]] +; CHECK-NEXT: [[BUF_PTR_4_SLICE_4:%.*]] = extractelement <5 x i32> [[BUF_PTR_4_LEGAL]], i64 4, !dbg [[DBG33]] +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[BUF_PTR_4_SLICE_4]], ptr addrspace(8) align 16 [[AUX_PTR_2_PTR_RSRC]], i32 [[AUX_PTR_2_PTR_PART_4]], i32 0, i32 0), !dbg [[DBG33]] ; CHECK-NEXT: ret float [[RET]], !dbg [[DBG34:![0-9]+]] ; %buf.ptr.var = alloca ptr addrspace(7), align 32, addrspace(5), !dbg !20