diff --git a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp index 75a0c47f7c277..657a406e9f705 100644 --- a/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp +++ b/llvm/lib/Target/AMDGPU/AMDGPULowerBufferFatPointers.cpp @@ -66,28 +66,6 @@ // Atomics operations on `ptr addrspace(7)` values are not suppported, as the // hardware does not include a 160-bit atomic. // -// ## Buffer contents type legalization -// -// The underlying buffer intrinsics only support types up to 128 bits long, -// and don't support complex types. If buffer operations were -// standard pointer operations that could be represented as MIR-level loads, -// this would be handled by the various legalization schemes in instruction -// selection. However, because we have to do the conversion from `load` and -// `store` to intrinsics at LLVM IR level, we must perform that legalization -// ourselves. -// -// This involves a combination of -// - Converting arrays to vectors where possible -// - Otherwise, splitting loads and stores of aggregates into loads/stores of -// each component. -// - Zero-extending things to fill a whole number of bytes -// - Casting values of types that don't neatly correspond to supported machine -// value -// (for example, an i96 or i256) into ones that would work ( -// like <3 x i32> and <8 x i32>, respectively) -// - Splitting values that are too long (such as aforementioned <8 x i32>) into -// multiple operations. -// // ## Type remapping // // We use a `ValueMapper` to mangle uses of [vectors of] buffer fat pointers @@ -108,6 +86,7 @@ // This phase also records intrinsics so that they can be remangled or deleted // later. // +// // ## Splitting pointer structs // // The meat of this pass consists of defining semantics for operations that @@ -239,7 +218,6 @@ #include "llvm/IR/ReplaceConstant.h" #include "llvm/InitializePasses.h" #include "llvm/Pass.h" -#include "llvm/Support/Alignment.h" #include "llvm/Support/AtomicOrdering.h" #include "llvm/Support/Debug.h" #include "llvm/Support/ErrorHandling.h" @@ -573,6 +551,7 @@ bool StoreFatPtrsAsIntsVisitor::visitLoadInst(LoadInst &LI) { auto *NLI = cast(LI.clone()); NLI->mutateType(IntTy); NLI = IRB.Insert(NLI); + copyMetadataForLoad(*NLI, LI); NLI->takeName(&LI); Value *CastBack = intsToFatPtrs(NLI, IntTy, Ty, NLI->getName()); @@ -597,540 +576,6 @@ bool StoreFatPtrsAsIntsVisitor::visitStoreInst(StoreInst &SI) { return true; } -namespace { -/// Convert loads/stores of types that the buffer intrinsics can't handle into -/// one ore more such loads/stores that consist of legal types. -/// -/// Do this by -/// 1. Recursing into structs (and arrays that don't share a memory layout with -/// vectors) since the intrinsics can't handle complex types. -/// 2. Converting arrays of non-aggregate, byte-sized types into their -/// corresponding vectors -/// 3. Bitcasting unsupported types, namely overly-long scalars and byte -/// vectors, into vectors of supported types. -/// 4. Splitting up excessively long reads/writes into multiple operations. -/// -/// Note that this doesn't handle complex data strucures, but, in the future, -/// the aggregate load splitter from SROA could be refactored to allow for that -/// case. -class LegalizeBufferContentTypesVisitor - : public InstVisitor { - friend class InstVisitor; - - IRBuilder<> IRB; - - const DataLayout &DL; - - /// If T is [N x U], where U is a scalar type, return the vector type - /// , otherwise, return T. - Type *scalarArrayTypeAsVector(Type *MaybeArrayType); - Value *arrayToVector(Value *V, Type *TargetType, const Twine &Name); - Value *vectorToArray(Value *V, Type *OrigType, const Twine &Name); - - /// Break up the loads of a struct into the loads of its components - - /// Convert a vector or scalar type that can't be operated on by buffer - /// intrinsics to one that would be legal through bitcasts and/or truncation. - /// Uses the wider of i32, i16, or i8 where possible. - Type *legalNonAggregateFor(Type *T); - Value *makeLegalNonAggregate(Value *V, Type *TargetType, const Twine &Name); - Value *makeIllegalNonAggregate(Value *V, Type *OrigType, const Twine &Name); - - struct VecSlice { - uint64_t Index = 0; - uint64_t Length = 0; - VecSlice() = delete; - }; - /// Return the [index, length] pairs into which `T` needs to be cut to form - /// legal buffer load or store operations. Clears `Slices`. Creates an empty - /// `Slices` for non-vector inputs and creates one slice if no slicing will be - /// needed. - void getVecSlices(Type *T, SmallVectorImpl &Slices); - - Value *extractSlice(Value *Vec, VecSlice S, const Twine &Name); - Value *insertSlice(Value *Whole, Value *Part, VecSlice S, const Twine &Name); - - /// In most cases, return `LegalType`. However, when given an input that would - /// normally be a legal type for the buffer intrinsics to return but that - /// isn't hooked up through SelectionDAG, return a type of the same width that - /// can be used with the relevant intrinsics. Specifically, handle the cases: - /// - <1 x T> => T for all T - /// - <=> i16, i32, 2xi32, 4xi32 (as needed) - /// - where T is under 32 bits and the total size is 96 bits <=> <3 x - /// i32> - Type *intrinsicTypeFor(Type *LegalType); - - bool visitLoadImpl(LoadInst &OrigLI, Type *PartType, - SmallVectorImpl &AggIdxs, uint64_t AggByteOffset, - Value *&Result, const Twine &Name); - /// Return value is (Changed, ModifiedInPlace) - std::pair visitStoreImpl(StoreInst &OrigSI, Type *PartType, - SmallVectorImpl &AggIdxs, - uint64_t AggByteOffset, - const Twine &Name); - - bool visitInstruction(Instruction &I) { return false; } - bool visitLoadInst(LoadInst &LI); - bool visitStoreInst(StoreInst &SI); - -public: - LegalizeBufferContentTypesVisitor(const DataLayout &DL, LLVMContext &Ctx) - : IRB(Ctx), DL(DL) {} - bool processFunction(Function &F); -}; -} // namespace - -Type *LegalizeBufferContentTypesVisitor::scalarArrayTypeAsVector(Type *T) { - ArrayType *AT = dyn_cast(T); - if (!AT) - return T; - Type *ET = AT->getElementType(); - if (!ET->isSingleValueType() || isa(ET)) - report_fatal_error("loading non-scalar arrays from buffer fat pointers " - "should have recursed"); - if (!DL.typeSizeEqualsStoreSize(AT)) - report_fatal_error( - "loading padded arrays from buffer fat pinters should have recursed"); - return FixedVectorType::get(ET, AT->getNumElements()); -} - -Value *LegalizeBufferContentTypesVisitor::arrayToVector(Value *V, - Type *TargetType, - const Twine &Name) { - Value *VectorRes = PoisonValue::get(TargetType); - auto *VT = cast(TargetType); - unsigned EC = VT->getNumElements(); - for (auto I : iota_range(0, EC, /*Inclusive=*/false)) { - Value *Elem = IRB.CreateExtractValue(V, I, Name + ".elem." + Twine(I)); - VectorRes = IRB.CreateInsertElement(VectorRes, Elem, I, - Name + ".as.vec." + Twine(I)); - } - return VectorRes; -} - -Value *LegalizeBufferContentTypesVisitor::vectorToArray(Value *V, - Type *OrigType, - const Twine &Name) { - Value *ArrayRes = PoisonValue::get(OrigType); - ArrayType *AT = cast(OrigType); - unsigned EC = AT->getNumElements(); - for (auto I : iota_range(0, EC, /*Inclusive=*/false)) { - Value *Elem = IRB.CreateExtractElement(V, I, Name + ".elem." + Twine(I)); - ArrayRes = IRB.CreateInsertValue(ArrayRes, Elem, I, - Name + ".as.array." + Twine(I)); - } - return ArrayRes; -} - -Type *LegalizeBufferContentTypesVisitor::legalNonAggregateFor(Type *T) { - TypeSize Size = DL.getTypeStoreSizeInBits(T); - // Implicitly zero-extend to the next byte if needed - if (!DL.typeSizeEqualsStoreSize(T)) - T = IRB.getIntNTy(Size.getFixedValue()); - Type *ElemTy = T->getScalarType(); - if (isa(ElemTy)) { - // Pointers are always big enough, and we'll let scalable vectors through to - // fail in codegen. - return T; - } - unsigned ElemSize = DL.getTypeSizeInBits(ElemTy).getFixedValue(); - if (isPowerOf2_32(ElemSize) && ElemSize >= 16 && ElemSize <= 128) { - // [vectors of] anything that's 16/32/64/128 bits can be cast and split into - // legal buffer operations. - return T; - } - Type *BestVectorElemType = nullptr; - if (Size.isKnownMultipleOf(32)) - BestVectorElemType = IRB.getInt32Ty(); - else if (Size.isKnownMultipleOf(16)) - BestVectorElemType = IRB.getInt16Ty(); - else - BestVectorElemType = IRB.getInt8Ty(); - unsigned NumCastElems = - Size.getFixedValue() / BestVectorElemType->getIntegerBitWidth(); - if (NumCastElems == 1) - return BestVectorElemType; - return FixedVectorType::get(BestVectorElemType, NumCastElems); -} - -Value *LegalizeBufferContentTypesVisitor::makeLegalNonAggregate( - Value *V, Type *TargetType, const Twine &Name) { - Type *SourceType = V->getType(); - TypeSize SourceSize = DL.getTypeSizeInBits(SourceType); - TypeSize TargetSize = DL.getTypeSizeInBits(TargetType); - if (SourceSize != TargetSize) { - Type *ShortScalarTy = IRB.getIntNTy(SourceSize.getFixedValue()); - Type *ByteScalarTy = IRB.getIntNTy(TargetSize.getFixedValue()); - Value *AsScalar = IRB.CreateBitCast(V, ShortScalarTy, Name + ".as.scalar"); - Value *Zext = IRB.CreateZExt(AsScalar, ByteScalarTy, Name + ".zext"); - V = Zext; - SourceType = ByteScalarTy; - } - return IRB.CreateBitCast(V, TargetType, Name + ".legal"); -} - -Value *LegalizeBufferContentTypesVisitor::makeIllegalNonAggregate( - Value *V, Type *OrigType, const Twine &Name) { - Type *LegalType = V->getType(); - TypeSize LegalSize = DL.getTypeSizeInBits(LegalType); - TypeSize OrigSize = DL.getTypeSizeInBits(OrigType); - if (LegalSize != OrigSize) { - Type *ShortScalarTy = IRB.getIntNTy(OrigSize.getFixedValue()); - Type *ByteScalarTy = IRB.getIntNTy(LegalSize.getFixedValue()); - Value *AsScalar = IRB.CreateBitCast(V, ByteScalarTy, Name + ".bytes.cast"); - Value *Trunc = IRB.CreateTrunc(AsScalar, ShortScalarTy, Name + ".trunc"); - return IRB.CreateBitCast(Trunc, OrigType, Name + ".orig"); - } - return IRB.CreateBitCast(V, OrigType, Name + ".real.ty"); -} - -Type *LegalizeBufferContentTypesVisitor::intrinsicTypeFor(Type *LegalType) { - auto *VT = dyn_cast(LegalType); - if (!VT) - return LegalType; - Type *ET = VT->getElementType(); - // Explicitly return the element type of 1-element vectors because the - // underlying intrinsics don't like <1 x T> even though it's a synonym for T. - if (VT->getNumElements() == 1) - return ET; - if (DL.getTypeSizeInBits(LegalType) == 96 && DL.getTypeSizeInBits(ET) < 32) - return FixedVectorType::get(IRB.getInt32Ty(), 3); - if (ET->isIntegerTy(8)) { - switch (VT->getNumElements()) { - default: - return LegalType; // Let it crash later - case 1: - return IRB.getInt8Ty(); - case 2: - return IRB.getInt16Ty(); - case 4: - return IRB.getInt32Ty(); - case 8: - return FixedVectorType::get(IRB.getInt32Ty(), 2); - case 16: - return FixedVectorType::get(IRB.getInt32Ty(), 4); - } - } - return LegalType; -} - -void LegalizeBufferContentTypesVisitor::getVecSlices( - Type *T, SmallVectorImpl &Slices) { - Slices.clear(); - auto *VT = dyn_cast(T); - if (!VT) - return; - - uint64_t ElemBitWidth = - DL.getTypeSizeInBits(VT->getElementType()).getFixedValue(); - - uint64_t ElemsPer4Words = 128 / ElemBitWidth; - uint64_t ElemsPer2Words = ElemsPer4Words / 2; - uint64_t ElemsPerWord = ElemsPer2Words / 2; - uint64_t ElemsPerShort = ElemsPerWord / 2; - uint64_t ElemsPerByte = ElemsPerShort / 2; - // If the elements evenly pack into 32-bit words, we can use 3-word stores, - // such as for <6 x bfloat> or <3 x i32>, but we can't dot his for, for - // example, <3 x i64>, since that's not slicing. - uint64_t ElemsPer3Words = ElemsPerWord * 3; - - uint64_t TotalElems = VT->getNumElements(); - uint64_t Index = 0; - auto TrySlice = [&](unsigned MaybeLen) { - if (MaybeLen > 0 && Index + MaybeLen <= TotalElems) { - VecSlice Slice{/*Index=*/Index, /*Length=*/MaybeLen}; - Slices.push_back(Slice); - Index += MaybeLen; - return true; - } - return false; - }; - while (Index < TotalElems) { - TrySlice(ElemsPer4Words) || TrySlice(ElemsPer3Words) || - TrySlice(ElemsPer2Words) || TrySlice(ElemsPerWord) || - TrySlice(ElemsPerShort) || TrySlice(ElemsPerByte); - } -} - -Value *LegalizeBufferContentTypesVisitor::extractSlice(Value *Vec, VecSlice S, - const Twine &Name) { - auto *VecVT = dyn_cast(Vec->getType()); - if (!VecVT) - return Vec; - if (S.Length == VecVT->getNumElements() && S.Index == 0) - return Vec; - if (S.Length == 1) - return IRB.CreateExtractElement(Vec, S.Index, - Name + ".slice." + Twine(S.Index)); - SmallVector Mask = llvm::to_vector( - llvm::iota_range(S.Index, S.Index + S.Length, /*Inclusive=*/false)); - return IRB.CreateShuffleVector(Vec, Mask, Name + ".slice." + Twine(S.Index)); -} - -Value *LegalizeBufferContentTypesVisitor::insertSlice(Value *Whole, Value *Part, - VecSlice S, - const Twine &Name) { - auto *WholeVT = dyn_cast(Whole->getType()); - if (!WholeVT) - return Part; - if (S.Length == WholeVT->getNumElements() && S.Index == 0) - return Part; - if (S.Length == 1) { - return IRB.CreateInsertElement(Whole, Part, S.Index, - Name + ".slice." + Twine(S.Index)); - } - int NumElems = cast(Whole->getType())->getNumElements(); - - // Extend the slice with poisons to make the main shufflevector happy. - SmallVector ExtPartMask(NumElems, -1); - for (auto [I, E] : llvm::enumerate( - MutableArrayRef(ExtPartMask).take_front(S.Length))) { - E = I; - } - Value *ExtPart = IRB.CreateShuffleVector(Part, ExtPartMask, - Name + ".ext." + Twine(S.Index)); - - SmallVector Mask = - llvm::to_vector(llvm::iota_range(0, NumElems, /*Inclusive=*/false)); - for (auto [I, E] : - llvm::enumerate(MutableArrayRef(Mask).slice(S.Index, S.Length))) - E = I + NumElems; - return IRB.CreateShuffleVector(Whole, ExtPart, Mask, - Name + ".parts." + Twine(S.Index)); -} - -bool LegalizeBufferContentTypesVisitor::visitLoadImpl( - LoadInst &OrigLI, Type *PartType, SmallVectorImpl &AggIdxs, - uint64_t AggByteOff, Value *&Result, const Twine &Name) { - if (auto *ST = dyn_cast(PartType)) { - const StructLayout *Layout = DL.getStructLayout(ST); - bool Changed = false; - for (auto [I, ElemTy, Offset] : - llvm::enumerate(ST->elements(), Layout->getMemberOffsets())) { - AggIdxs.push_back(I); - Changed |= visitLoadImpl(OrigLI, ElemTy, AggIdxs, - AggByteOff + Offset.getFixedValue(), Result, - Name + "." + Twine(I)); - AggIdxs.pop_back(); - } - return Changed; - } - if (auto *AT = dyn_cast(PartType)) { - Type *ElemTy = AT->getElementType(); - if (!ElemTy->isSingleValueType() || !DL.typeSizeEqualsStoreSize(ElemTy) || - ElemTy->isVectorTy()) { - TypeSize ElemStoreSize = DL.getTypeStoreSize(ElemTy); - bool Changed = false; - for (auto I : llvm::iota_range(0, AT->getNumElements(), - /*Inclusive=*/false)) { - AggIdxs.push_back(I); - Changed |= visitLoadImpl(OrigLI, ElemTy, AggIdxs, - AggByteOff + I * ElemStoreSize.getFixedValue(), - Result, Name + Twine(I)); - AggIdxs.pop_back(); - } - return Changed; - } - } - - // Typical case - - Type *ArrayAsVecType = scalarArrayTypeAsVector(PartType); - Type *LegalType = legalNonAggregateFor(ArrayAsVecType); - - SmallVector Slices; - getVecSlices(LegalType, Slices); - bool HasSlices = Slices.size() > 1; - bool IsAggPart = !AggIdxs.empty(); - Value *LoadsRes; - if (!HasSlices && !IsAggPart) { - Type *LoadableType = intrinsicTypeFor(LegalType); - if (LoadableType == PartType) - return false; - - IRB.SetInsertPoint(&OrigLI); - auto *NLI = cast(OrigLI.clone()); - NLI->mutateType(LoadableType); - NLI = IRB.Insert(NLI); - NLI->setName(Name + ".loadable"); - - LoadsRes = IRB.CreateBitCast(NLI, LegalType, Name + ".from.loadable"); - } else { - IRB.SetInsertPoint(&OrigLI); - LoadsRes = PoisonValue::get(LegalType); - Value *OrigPtr = OrigLI.getPointerOperand(); - // If we're needing to spill something into more than one load, its legal - // type will be a vector (ex. an i256 load will have LegalType = <8 x i32>). - // But if we're already a scalar (which can happen if we're splitting up a - // struct), the element type will be the legal type itself. - Type *ElemType = LegalType->getScalarType(); - unsigned ElemBytes = DL.getTypeStoreSize(ElemType); - AAMDNodes AANodes = OrigLI.getAAMetadata(); - if (IsAggPart && Slices.empty()) - Slices.push_back(VecSlice{/*Index=*/0, /*Length=*/1}); - for (VecSlice S : Slices) { - Type *SliceType = - S.Length != 1 ? FixedVectorType::get(ElemType, S.Length) : ElemType; - int64_t ByteOffset = AggByteOff + S.Index * ElemBytes; - // You can't reasonably expect loads to wrap around the edge of memory. - Value *NewPtr = IRB.CreateGEP( - IRB.getInt8Ty(), OrigLI.getPointerOperand(), IRB.getInt32(ByteOffset), - OrigPtr->getName() + ".off.ptr." + Twine(ByteOffset), - GEPNoWrapFlags::noUnsignedWrap()); - Type *LoadableType = intrinsicTypeFor(SliceType); - LoadInst *NewLI = IRB.CreateAlignedLoad( - LoadableType, NewPtr, commonAlignment(OrigLI.getAlign(), ByteOffset), - Name + ".off." + Twine(ByteOffset)); - copyMetadataForLoad(*NewLI, OrigLI); - NewLI->setAAMetadata( - AANodes.adjustForAccess(ByteOffset, LoadableType, DL)); - NewLI->setAtomic(OrigLI.getOrdering(), OrigLI.getSyncScopeID()); - NewLI->setVolatile(OrigLI.isVolatile()); - Value *Loaded = IRB.CreateBitCast(NewLI, SliceType, - NewLI->getName() + ".from.loadable"); - LoadsRes = insertSlice(LoadsRes, Loaded, S, Name); - } - } - if (LegalType != ArrayAsVecType) - LoadsRes = makeIllegalNonAggregate(LoadsRes, ArrayAsVecType, Name); - if (ArrayAsVecType != PartType) - LoadsRes = vectorToArray(LoadsRes, PartType, Name); - - if (IsAggPart) - Result = IRB.CreateInsertValue(Result, LoadsRes, AggIdxs, Name); - else - Result = LoadsRes; - return true; -} - -bool LegalizeBufferContentTypesVisitor::visitLoadInst(LoadInst &LI) { - if (LI.getPointerAddressSpace() != AMDGPUAS::BUFFER_FAT_POINTER) - return false; - - SmallVector AggIdxs; - Type *OrigType = LI.getType(); - Value *Result = PoisonValue::get(OrigType); - bool Changed = visitLoadImpl(LI, OrigType, AggIdxs, 0, Result, LI.getName()); - if (!Changed) - return false; - Result->takeName(&LI); - LI.replaceAllUsesWith(Result); - LI.eraseFromParent(); - return Changed; -} - -std::pair LegalizeBufferContentTypesVisitor::visitStoreImpl( - StoreInst &OrigSI, Type *PartType, SmallVectorImpl &AggIdxs, - uint64_t AggByteOff, const Twine &Name) { - if (auto *ST = dyn_cast(PartType)) { - const StructLayout *Layout = DL.getStructLayout(ST); - bool Changed = false; - for (auto [I, ElemTy, Offset] : - llvm::enumerate(ST->elements(), Layout->getMemberOffsets())) { - AggIdxs.push_back(I); - Changed |= std::get<0>(visitStoreImpl(OrigSI, ElemTy, AggIdxs, - AggByteOff + Offset.getFixedValue(), - Name + "." + Twine(I))); - AggIdxs.pop_back(); - } - return std::make_pair(Changed, /*ModifiedInPlace=*/false); - } - if (auto *AT = dyn_cast(PartType)) { - Type *ElemTy = AT->getElementType(); - if (!ElemTy->isSingleValueType() || !DL.typeSizeEqualsStoreSize(ElemTy) || - ElemTy->isVectorTy()) { - TypeSize ElemStoreSize = DL.getTypeStoreSize(ElemTy); - bool Changed = false; - for (auto I : llvm::iota_range(0, AT->getNumElements(), - /*Inclusive=*/false)) { - AggIdxs.push_back(I); - Changed |= std::get<0>(visitStoreImpl( - OrigSI, ElemTy, AggIdxs, - AggByteOff + I * ElemStoreSize.getFixedValue(), Name + Twine(I))); - AggIdxs.pop_back(); - } - return std::make_pair(Changed, /*ModifiedInPlace=*/false); - } - } - - Value *OrigData = OrigSI.getValueOperand(); - Value *NewData = OrigData; - - bool IsAggPart = !AggIdxs.empty(); - if (IsAggPart) - NewData = IRB.CreateExtractValue(NewData, AggIdxs, Name); - - Type *ArrayAsVecType = scalarArrayTypeAsVector(PartType); - if (ArrayAsVecType != PartType) { - NewData = arrayToVector(NewData, ArrayAsVecType, Name); - } - - Type *LegalType = legalNonAggregateFor(ArrayAsVecType); - if (LegalType != ArrayAsVecType) { - NewData = makeLegalNonAggregate(NewData, LegalType, Name); - } - - SmallVector Slices; - getVecSlices(LegalType, Slices); - bool NeedToSplit = Slices.size() > 1 || IsAggPart; - if (!NeedToSplit) { - Type *StorableType = intrinsicTypeFor(LegalType); - if (StorableType == PartType) - return std::make_pair(/*Changed=*/false, /*ModifiedInPlace=*/false); - NewData = IRB.CreateBitCast(NewData, StorableType, Name + ".storable"); - OrigSI.setOperand(0, NewData); - return std::make_pair(/*Changed=*/true, /*ModifiedInPlace=*/true); - } - - Value *OrigPtr = OrigSI.getPointerOperand(); - Type *ElemType = LegalType->getScalarType(); - if (IsAggPart && Slices.empty()) - Slices.push_back(VecSlice{/*Index=*/0, /*Length=*/1}); - unsigned ElemBytes = DL.getTypeStoreSize(ElemType); - AAMDNodes AANodes = OrigSI.getAAMetadata(); - for (VecSlice S : Slices) { - Type *SliceType = - S.Length != 1 ? FixedVectorType::get(ElemType, S.Length) : ElemType; - int64_t ByteOffset = AggByteOff + S.Index * ElemBytes; - Value *NewPtr = - IRB.CreateGEP(IRB.getInt8Ty(), OrigPtr, IRB.getInt32(ByteOffset), - OrigPtr->getName() + ".part." + Twine(S.Index), - GEPNoWrapFlags::noUnsignedWrap()); - Value *DataSlice = extractSlice(NewData, S, Name); - Type *StorableType = intrinsicTypeFor(SliceType); - DataSlice = IRB.CreateBitCast(DataSlice, StorableType, - DataSlice->getName() + ".storable"); - auto *NewSI = cast(OrigSI.clone()); - NewSI->setAlignment(commonAlignment(OrigSI.getAlign(), ByteOffset)); - IRB.Insert(NewSI); - NewSI->setOperand(0, DataSlice); - NewSI->setOperand(1, NewPtr); - NewSI->setAAMetadata(AANodes.adjustForAccess(ByteOffset, StorableType, DL)); - } - return std::make_pair(/*Changed=*/true, /*ModifiedInPlace=*/false); -} - -bool LegalizeBufferContentTypesVisitor::visitStoreInst(StoreInst &SI) { - if (SI.getPointerAddressSpace() != AMDGPUAS::BUFFER_FAT_POINTER) - return false; - IRB.SetInsertPoint(&SI); - SmallVector AggIdxs; - Value *OrigData = SI.getValueOperand(); - auto [Changed, ModifiedInPlace] = - visitStoreImpl(SI, OrigData->getType(), AggIdxs, 0, OrigData->getName()); - if (Changed && !ModifiedInPlace) - SI.eraseFromParent(); - return Changed; -} - -bool LegalizeBufferContentTypesVisitor::processFunction(Function &F) { - bool Changed = false; - for (Instruction &I : make_early_inc_range(instructions(F))) { - Changed |= visit(I); - } - return Changed; -} - /// Return the ptr addrspace(8) and i32 (resource and offset parts) in a lowered /// buffer fat pointer constant. static std::pair @@ -2321,16 +1766,12 @@ bool AMDGPULowerBufferFatPointers::run(Module &M, const TargetMachine &TM) { } StoreFatPtrsAsIntsVisitor MemOpsRewrite(&IntTM, M.getContext()); - LegalizeBufferContentTypesVisitor BufferContentsTypeRewrite(DL, - M.getContext()); for (Function &F : M.functions()) { bool InterfaceChange = hasFatPointerInterface(F, &StructTM); bool BodyChanges = containsBufferFatPointers(F, &StructTM); Changed |= MemOpsRewrite.processFunction(F); - if (InterfaceChange || BodyChanges) { + if (InterfaceChange || BodyChanges) NeedsRemap.push_back(std::make_pair(&F, InterfaceChange)); - Changed |= BufferContentsTypeRewrite.processFunction(F); - } } if (NeedsRemap.empty()) return Changed; diff --git a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll b/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll deleted file mode 100644 index 4c7a4ba3a44a5..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/buffer-fat-pointers-contents-legalization.ll +++ /dev/null @@ -1,3998 +0,0 @@ -; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py UTC_ARGS: --version 5 -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 < %s | FileCheck -check-prefix=SDAG %s -; RUN: llc -mtriple=amdgcn -mcpu=gfx900 -global-isel=1 < %s | FileCheck -check-prefix=GISEL %s - -; Note: if you're adding tests here, also add them to -; lower-buffer-fat-pointers-contents-legalization.ll to verify the IR produced by -; the lowering. - -;;; Legal types. These are natively supported, no casts should be performed. - -define i8 @load_i8(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load i8, ptr addrspace(7) %p - ret i8 %ret -} - -define void @store_i8(i8 %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_byte v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_byte v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store i8 %data, ptr addrspace(7) %p - ret void -} - -define i16 @load_i16(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_ushort v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_ushort v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load i16, ptr addrspace(7) %p - ret i16 %ret -} - -define void @store_i16(i16 %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_short v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_short v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store i16 %data, ptr addrspace(7) %p - ret void -} - -define i32 @load_i32(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load i32, ptr addrspace(7) %p - ret i32 %ret -} - -define void @store_i32(i32 %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store i32 %data, ptr addrspace(7) %p - ret void -} - -define i64 @load_i64(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_i64: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_i64: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load i64, ptr addrspace(7) %p - ret i64 %ret -} - -define void @store_i64(i64 %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_i64: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_i64: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store i64 %data, ptr addrspace(7) %p - ret void -} - -define i128 @load_i128(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_i128: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_i128: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load i128, ptr addrspace(7) %p - ret i128 %ret -} - -define void @store_i128(i128 %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_i128: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_i128: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store i128 %data, ptr addrspace(7) %p - ret void -} - -define <1 x i32> @load_v1i32(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v1i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v1i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <1 x i32>, ptr addrspace(7) %p - ret <1 x i32> %ret -} - -define void @store_v1i32(<1 x i32> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v1i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v1i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <1 x i32> %data, ptr addrspace(7) %p - ret void -} - -define <2 x i32> @load_v2i32(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v2i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v2i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <2 x i32>, ptr addrspace(7) %p - ret <2 x i32> %ret -} - -define void @store_v2i32(<2 x i32> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v2i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v2i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <2 x i32> %data, ptr addrspace(7) %p - ret void -} - -define <3 x i32> @load_v3i32(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v3i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v3i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <3 x i32>, ptr addrspace(7) %p - ret <3 x i32> %ret -} - -define void @store_v3i32(<3 x i32> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v3i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v3i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <3 x i32> %data, ptr addrspace(7) %p - ret void -} - -define <4 x i32> @load_v4i32(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v4i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v4i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <4 x i32>, ptr addrspace(7) %p - ret <4 x i32> %ret -} - -define void @store_v4i32(<4 x i32> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v4i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v4i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <4 x i32> %data, ptr addrspace(7) %p - ret void -} - -define <2 x i16> @load_v2i16(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v2i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v2i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <2 x i16>, ptr addrspace(7) %p - ret <2 x i16> %ret -} - -define void @store_v2i16(<2 x i16> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v2i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v2i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <2 x i16> %data, ptr addrspace(7) %p - ret void -} - -define <4 x i16> @load_v4i16(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v4i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v4i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <4 x i16>, ptr addrspace(7) %p - ret <4 x i16> %ret -} - -define void @store_v4i16(<4 x i16> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v4i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v4i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <4 x i16> %data, ptr addrspace(7) %p - ret void -} - -define <8 x i16> @load_v8i16(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v8i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v8i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <8 x i16>, ptr addrspace(7) %p - ret <8 x i16> %ret -} - -define void @store_v8i16(<8 x i16> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v8i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v8i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <8 x i16> %data, ptr addrspace(7) %p - ret void -} - -define <2 x i64> @load_v2i64(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v2i64: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v2i64: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <2 x i64>, ptr addrspace(7) %p - ret <2 x i64> %ret -} - -define void @store_v2i64(<2 x i64> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v2i64: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v2i64: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <2 x i64> %data, ptr addrspace(7) %p - ret void -} - -define half @load_f16(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_f16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_ushort v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_f16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_ushort v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load half, ptr addrspace(7) %p - ret half %ret -} - -define void @store_f16(half %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_f16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_short v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_f16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_short v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store half %data, ptr addrspace(7) %p - ret void -} - -define bfloat @load_bf16(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_bf16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_ushort v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_bf16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_ushort v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load bfloat, ptr addrspace(7) %p - ret bfloat %ret -} - -define void @store_bf16(bfloat %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_bf16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_short v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_bf16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_short v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store bfloat %data, ptr addrspace(7) %p - ret void -} - -define <2 x half> @load_v2f16(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v2f16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v2f16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <2 x half>, ptr addrspace(7) %p - ret <2 x half> %ret -} - -define void @store_v2f16(<2 x half> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v2f16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v2f16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <2 x half> %data, ptr addrspace(7) %p - ret void -} - -define <4 x bfloat> @load_v4bf16(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v4bf16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v4bf16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <4 x bfloat>, ptr addrspace(7) %p - ret <4 x bfloat> %ret -} - -define void @store_v4bf16(<4 x bfloat> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v4bf16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v4bf16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v3, 16, v1 -; GISEL-NEXT: v_mov_b32_sdwa v0, v2 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 -; GISEL-NEXT: v_mov_b32_sdwa v1, v3 dst_sel:WORD_1 dst_unused:UNUSED_PRESERVE src0_sel:WORD_0 -; GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <4 x bfloat> %data, ptr addrspace(7) %p - ret void -} - -define <8 x half> @load_v8f16(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v8f16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v8f16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <8 x half>, ptr addrspace(7) %p - ret <8 x half> %ret -} - -define void @store_v8f16(<8 x half> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v8f16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v8f16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <8 x half> %data, ptr addrspace(7) %p - ret void -} - -define float @load_f32(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_f32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_f32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load float, ptr addrspace(7) %p - ret float %ret -} - -define void @store_f32(float %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_f32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_f32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store float %data, ptr addrspace(7) %p - ret void -} - -define <2 x float> @load_v2f32(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v2f32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v2f32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <2 x float>, ptr addrspace(7) %p - ret <2 x float> %ret -} - -define void @store_v2f32(<2 x float> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v2f32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v2f32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <2 x float> %data, ptr addrspace(7) %p - ret void -} - -define <3 x float> @load_v3f32(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v3f32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v3f32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <3 x float>, ptr addrspace(7) %p - ret <3 x float> %ret -} - -define void @store_v3f32(<3 x float> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v3f32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v3f32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <3 x float> %data, ptr addrspace(7) %p - ret void -} - -define <4 x float> @load_v4f32(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v4f32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v4f32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <4 x float>, ptr addrspace(7) %p - ret <4 x float> %ret -} - -define void @store_v4f32(<4 x float> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v4f32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v4f32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <4 x float> %data, ptr addrspace(7) %p - ret void -} - -define ptr addrspace(0) @load_p0(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_p0: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_p0: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load ptr addrspace(0), ptr addrspace(7) %p - ret ptr addrspace(0) %ret -} - -define void @store_p0(ptr addrspace(0) %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_p0: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_p0: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store ptr addrspace(0) %data, ptr addrspace(7) %p - ret void -} - -define ptr addrspace(1) @load_p1(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_p1: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_p1: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load ptr addrspace(1), ptr addrspace(7) %p - ret ptr addrspace(1) %ret -} - -define void @store_p1(ptr addrspace(1) %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_p1: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_p1: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store ptr addrspace(1) %data, ptr addrspace(7) %p - ret void -} - -define ptr addrspace(2) @load_p2(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_p2: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_p2: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load ptr addrspace(2), ptr addrspace(7) %p - ret ptr addrspace(2) %ret -} - -define void @store_p2(ptr addrspace(2) %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_p2: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_p2: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store ptr addrspace(2) %data, ptr addrspace(7) %p - ret void -} - -define ptr addrspace(3) @load_p3(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_p3: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_p3: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load ptr addrspace(3), ptr addrspace(7) %p - ret ptr addrspace(3) %ret -} - -define void @store_p3(ptr addrspace(3) %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_p3: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_p3: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store ptr addrspace(3) %data, ptr addrspace(7) %p - ret void -} - -define ptr addrspace(4) @load_p4(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_p4: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_p4: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load ptr addrspace(4), ptr addrspace(7) %p - ret ptr addrspace(4) %ret -} - -define void @store_p4(ptr addrspace(4) %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_p4: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_p4: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store ptr addrspace(4) %data, ptr addrspace(7) %p - ret void -} - -define ptr addrspace(5) @load_p5(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_p5: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_p5: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load ptr addrspace(5), ptr addrspace(7) %p - ret ptr addrspace(5) %ret -} - -define void @store_p5(ptr addrspace(5) %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_p5: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_p5: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store ptr addrspace(5) %data, ptr addrspace(7) %p - ret void -} - -define ptr addrspace(6) @load_p6(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_p6: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_p6: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load ptr addrspace(6), ptr addrspace(7) %p - ret ptr addrspace(6) %ret -} - -define void @store_p6(ptr addrspace(6) %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_p6: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_p6: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store ptr addrspace(6) %data, ptr addrspace(7) %p - ret void -} - -define ptr addrspace(8) @load_p8(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_p8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_p8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load ptr addrspace(8), ptr addrspace(7) %p - ret ptr addrspace(8) %ret -} - -define void @store_p8(ptr addrspace(8) %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_p8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_p8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store ptr addrspace(8) %data, ptr addrspace(7) %p - ret void -} - -define <2 x ptr addrspace(1)> @load_v2p1(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v2p1: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v2p1: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <2 x ptr addrspace(1)>, ptr addrspace(7) %p - ret <2 x ptr addrspace(1)> %ret -} - -define void @store_v2p1(<2 x ptr addrspace(1)> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v2p1: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v2p1: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <2 x ptr addrspace(1)> %data, ptr addrspace(7) %p - ret void -} - -define <2 x ptr addrspace(5)> @load_v2p5(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v2p5: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v2p5: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <2 x ptr addrspace(5)>, ptr addrspace(7) %p - ret <2 x ptr addrspace(5)> %ret -} - -define void @store_v2p5(<2 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v2p5: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v2p5: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <2 x ptr addrspace(5)> %data, ptr addrspace(7) %p - ret void -} - -define <3 x ptr addrspace(5)> @load_v3p5(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v3p5: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v3p5: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <3 x ptr addrspace(5)>, ptr addrspace(7) %p - ret <3 x ptr addrspace(5)> %ret -} - -define void @store_v3p5(<3 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v3p5: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v3p5: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <3 x ptr addrspace(5)> %data, ptr addrspace(7) %p - ret void -} - -define <4 x ptr addrspace(5)> @load_v4p5(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v4p5: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v4p5: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <4 x ptr addrspace(5)>, ptr addrspace(7) %p - ret <4 x ptr addrspace(5)> %ret -} - -define void @store_v4p5(<4 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v4p5: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v4p5: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <4 x ptr addrspace(5)> %data, ptr addrspace(7) %p - ret void -} - -;;; 3 words in a short type. These need to be bitcast to <3 x i32> to be supported. - -define <6 x half> @load_v6f16(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v6f16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v6f16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <6 x half>, ptr addrspace(7) %p - ret <6 x half> %ret -} - -define void @store_v6f16(<6 x half> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v6f16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v6f16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <6 x half> %data, ptr addrspace(7) %p - ret void -} - -;;; Long types (32 bit elements). Must be split into multiple operations. - -define <5 x float> @load_v5f32(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v5f32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_load_dword v4, off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v5f32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_load_dword v4, off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <5 x float>, ptr addrspace(7) %p - ret <5 x float> %ret -} - -define void @store_v5f32(<5 x float> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v5f32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v5f32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <5 x float> %data, ptr addrspace(7) %p - ret void -} - -define <6 x float> @load_v6f32(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v6f32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_load_dwordx2 v[4:5], off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v6f32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_load_dwordx2 v[4:5], off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <6 x float>, ptr addrspace(7) %p - ret <6 x float> %ret -} - -define void @store_v6f32(<6 x float> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v6f32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_store_dwordx2 v[4:5], off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v6f32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_store_dwordx2 v[4:5], off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <6 x float> %data, ptr addrspace(7) %p - ret void -} - -define <7 x float> @load_v7f32(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v7f32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_load_dwordx3 v[4:6], off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v7f32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_load_dwordx3 v[4:6], off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <7 x float>, ptr addrspace(7) %p - ret <7 x float> %ret -} - -define void @store_v7f32(<7 x float> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v7f32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_store_dwordx3 v[4:6], off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v7f32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_store_dwordx3 v[4:6], off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <7 x float> %data, ptr addrspace(7) %p - ret void -} - -define <8 x float> @load_v8f32(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v8f32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_load_dwordx4 v[4:7], off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v8f32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_load_dwordx4 v[4:7], off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <8 x float>, ptr addrspace(7) %p - ret <8 x float> %ret -} - -define void @store_v8f32(<8 x float> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v8f32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v8f32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <8 x float> %data, ptr addrspace(7) %p - ret void -} - -define <10 x float> @load_v10f32(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v10f32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_load_dwordx4 v[4:7], off, s[16:19], 0 offset:16 -; SDAG-NEXT: buffer_load_dwordx2 v[8:9], off, s[16:19], 0 offset:32 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v10f32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_load_dwordx4 v[4:7], off, s[16:19], 0 offset:16 -; GISEL-NEXT: buffer_load_dwordx2 v[8:9], off, s[16:19], 0 offset:32 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <10 x float>, ptr addrspace(7) %p - ret <10 x float> %ret -} - -define void @store_v10f32(<10 x float> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v10f32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16 -; SDAG-NEXT: buffer_store_dwordx2 v[8:9], off, s[16:19], 0 offset:32 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v10f32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16 -; GISEL-NEXT: buffer_store_dwordx2 v[8:9], off, s[16:19], 0 offset:32 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <10 x float> %data, ptr addrspace(7) %p - ret void -} - -define <6 x i32> @load_v6i32(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v6i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_load_dwordx2 v[4:5], off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v6i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_load_dwordx2 v[4:5], off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <6 x i32>, ptr addrspace(7) %p - ret <6 x i32> %ret -} - -define void @store_v6i32(<6 x i32> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v6i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_store_dwordx2 v[4:5], off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v6i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_store_dwordx2 v[4:5], off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <6 x i32> %data, ptr addrspace(7) %p - ret void -} - -define <4 x ptr addrspace(1)> @load_v4p1(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v4p1: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_load_dwordx4 v[4:7], off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v4p1: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_load_dwordx4 v[4:7], off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <4 x ptr addrspace(1)>, ptr addrspace(7) %p - ret <4 x ptr addrspace(1)> %ret -} - -define void @store_v4p1(<4 x ptr addrspace(1)> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v4p1: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v4p1: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <4 x ptr addrspace(1)> %data, ptr addrspace(7) %p - ret void -} - -;;; Uneven types with 16-bit elements. Require splitting into multiple operations. - -define <1 x i16> @load_v1i16(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v1i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_ushort v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v1i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_ushort v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <1 x i16>, ptr addrspace(7) %p - ret <1 x i16> %ret -} - -define void @store_v1i16(<1 x i16> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v1i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_short v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v1i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_short v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <1 x i16> %data, ptr addrspace(7) %p - ret void -} - -define <3 x i16> @load_v3i16(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v3i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; SDAG-NEXT: buffer_load_ushort v1, off, s[16:19], 0 offset:4 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v3i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: buffer_load_ushort v1, off, s[16:19], 0 offset:4 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <3 x i16>, ptr addrspace(7) %p - ret <3 x i16> %ret -} - -define void @store_v3i16(<3 x i16> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v3i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: buffer_store_short v1, off, s[16:19], 0 offset:4 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v3i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: buffer_store_short v1, off, s[16:19], 0 offset:4 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <3 x i16> %data, ptr addrspace(7) %p - ret void -} - -define <5 x i16> @load_v5i16(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v5i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: buffer_load_ushort v2, off, s[16:19], 0 offset:8 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v5i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: buffer_load_ushort v2, off, s[16:19], 0 offset:8 -; GISEL-NEXT: s_mov_b32 s4, 0xffff -; GISEL-NEXT: s_waitcnt vmcnt(1) -; GISEL-NEXT: v_bfi_b32 v0, s4, v0, v0 -; GISEL-NEXT: v_bfi_b32 v1, s4, v1, v1 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <5 x i16>, ptr addrspace(7) %p - ret <5 x i16> %ret -} - -define void @store_v5i16(<5 x i16> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v5i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: buffer_store_short v2, off, s[16:19], 0 offset:8 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v5i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: buffer_store_short v2, off, s[16:19], 0 offset:8 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <5 x i16> %data, ptr addrspace(7) %p - ret void -} - -define <6 x i16> @load_v6i16(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v6i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v6i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <6 x i16>, ptr addrspace(7) %p - ret <6 x i16> %ret -} - -define void @store_v6i16(<6 x i16> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v6i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v6i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <6 x i16> %data, ptr addrspace(7) %p - ret void -} - -define <7 x i16> @load_v7i16(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v7i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0 -; SDAG-NEXT: buffer_load_ushort v3, off, s[16:19], 0 offset:12 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v7i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0 -; GISEL-NEXT: buffer_load_ushort v3, off, s[16:19], 0 offset:12 -; GISEL-NEXT: s_mov_b32 s4, 0xffff -; GISEL-NEXT: s_waitcnt vmcnt(1) -; GISEL-NEXT: v_bfi_b32 v0, s4, v0, v0 -; GISEL-NEXT: v_bfi_b32 v1, s4, v1, v1 -; GISEL-NEXT: v_bfi_b32 v2, s4, v2, v2 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <7 x i16>, ptr addrspace(7) %p - ret <7 x i16> %ret -} - -define void @store_v7i16(<7 x i16> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v7i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[16:19], 0 -; SDAG-NEXT: buffer_store_short v3, off, s[16:19], 0 offset:12 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v7i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[16:19], 0 -; GISEL-NEXT: buffer_store_short v3, off, s[16:19], 0 offset:12 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <7 x i16> %data, ptr addrspace(7) %p - ret void -} - -define <9 x i16> @load_v9i16(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v9i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v9i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_mov_b32 s4, 0xffff -; GISEL-NEXT: s_waitcnt vmcnt(1) -; GISEL-NEXT: v_bfi_b32 v0, s4, v0, v0 -; GISEL-NEXT: v_bfi_b32 v1, s4, v1, v1 -; GISEL-NEXT: v_bfi_b32 v2, s4, v2, v2 -; GISEL-NEXT: v_bfi_b32 v3, s4, v3, v3 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <9 x i16>, ptr addrspace(7) %p - ret <9 x i16> %ret -} - -define void @store_v9i16(<9 x i16> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v9i16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_store_short v4, off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v9i16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_store_short v4, off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <9 x i16> %data, ptr addrspace(7) %p - ret void -} - -;;; Byte vectors. Need to be -;;; - Split into multiple operations -;;; - Bitcast if they have a natively supported width - -define <1 x i8> @load_v1i8(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v1i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v1i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <1 x i8>, ptr addrspace(7) %p - ret <1 x i8> %ret -} - -define void @store_v1i8(<1 x i8> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v1i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_byte v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v1i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_byte v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <1 x i8> %data, ptr addrspace(7) %p - ret void -} - -define <2 x i8> @load_v2i8(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v2i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_ushort v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v2i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_ushort v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <2 x i8>, ptr addrspace(7) %p - ret <2 x i8> %ret -} - -define void @store_v2i8(<2 x i8> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v2i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: buffer_store_short v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v2i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GISEL-NEXT: buffer_store_short v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <2 x i8> %data, ptr addrspace(7) %p - ret void -} - -define <3 x i8> @load_v3i8(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v3i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_ushort v0, off, s[16:19], 0 -; SDAG-NEXT: buffer_load_ubyte v2, off, s[16:19], 0 offset:2 -; SDAG-NEXT: s_waitcnt vmcnt(1) -; SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v3i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_ushort v0, off, s[16:19], 0 -; GISEL-NEXT: buffer_load_ubyte v2, off, s[16:19], 0 offset:2 -; GISEL-NEXT: s_waitcnt vmcnt(1) -; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <3 x i8>, ptr addrspace(7) %p - ret <3 x i8> %ret -} - -define void @store_v3i8(<3 x i8> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v3i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: buffer_store_short v0, off, s[16:19], 0 -; SDAG-NEXT: buffer_store_byte v2, off, s[16:19], 0 offset:2 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v3i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GISEL-NEXT: buffer_store_short v0, off, s[16:19], 0 -; GISEL-NEXT: buffer_store_byte v2, off, s[16:19], 0 offset:2 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <3 x i8> %data, ptr addrspace(7) %p - ret void -} - -define <4 x i8> @load_v4i8(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v4i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; SDAG-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v4i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <4 x i8>, ptr addrspace(7) %p - ret <4 x i8> %ret -} - -define void @store_v4i8(<4 x i8> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v4i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v3 -; SDAG-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v4i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_mov_b32_e32 v5, 8 -; GISEL-NEXT: v_mov_b32_e32 v4, 0xff -; GISEL-NEXT: v_lshlrev_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_and_or_b32 v0, v0, v4, v1 -; GISEL-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GISEL-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GISEL-NEXT: v_or3_b32 v0, v0, v1, v2 -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <4 x i8> %data, ptr addrspace(7) %p - ret void -} - -define <5 x i8> @load_v5i8(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v5i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; SDAG-NEXT: buffer_load_ubyte v4, off, s[16:19], 0 offset:4 -; SDAG-NEXT: s_waitcnt vmcnt(1) -; SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; SDAG-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v5i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: buffer_load_ubyte v4, off, s[16:19], 0 offset:4 -; GISEL-NEXT: s_waitcnt vmcnt(1) -; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <5 x i8>, ptr addrspace(7) %p - ret <5 x i8> %ret -} - -define void @store_v5i8(<5 x i8> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v5i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v3 -; SDAG-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: buffer_store_byte v4, off, s[16:19], 0 offset:4 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v5i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_mov_b32_e32 v6, 8 -; GISEL-NEXT: v_mov_b32_e32 v5, 0xff -; GISEL-NEXT: v_lshlrev_b32_sdwa v1, v6, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_and_or_b32 v0, v0, v5, v1 -; GISEL-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GISEL-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GISEL-NEXT: v_or3_b32 v0, v0, v1, v2 -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: buffer_store_byte v4, off, s[16:19], 0 offset:4 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <5 x i8> %data, ptr addrspace(7) %p - ret void -} - -define <6 x i8> @load_v6i8(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v6i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_ushort v6, off, s[16:19], 0 offset:4 -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(1) -; SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v6 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: v_lshrrev_b32_e32 v7, 8, v0 -; SDAG-NEXT: v_lshrrev_b64 v[3:4], 24, v[0:1] -; SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; SDAG-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; SDAG-NEXT: v_mov_b32_e32 v4, v6 -; SDAG-NEXT: v_mov_b32_e32 v1, v7 -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v6i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4 -; GISEL-NEXT: s_waitcnt vmcnt(1) -; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4 -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <6 x i8>, ptr addrspace(7) %p - ret <6 x i8> %ret -} - -define void @store_v6i8(<6 x i8> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v6i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v3 -; SDAG-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v5, 8, v5 -; SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: buffer_store_short v4, off, s[16:19], 0 offset:4 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v6i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GISEL-NEXT: v_and_b32_e32 v1, 0xff, v3 -; GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GISEL-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GISEL-NEXT: v_and_b32_e32 v2, 0xff, v5 -; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GISEL-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GISEL-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: buffer_store_short v2, off, s[16:19], 0 offset:4 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <6 x i8> %data, ptr addrspace(7) %p - ret void -} - -define <7 x i8> @load_v7i8(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v7i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; SDAG-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4 -; SDAG-NEXT: buffer_load_ubyte v6, off, s[16:19], 0 offset:6 -; SDAG-NEXT: s_waitcnt vmcnt(2) -; SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; SDAG-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; SDAG-NEXT: s_waitcnt vmcnt(1) -; SDAG-NEXT: v_lshrrev_b32_e32 v5, 8, v4 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v7i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4 -; GISEL-NEXT: buffer_load_ubyte v6, off, s[16:19], 0 offset:6 -; GISEL-NEXT: s_waitcnt vmcnt(2) -; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GISEL-NEXT: s_waitcnt vmcnt(1) -; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <7 x i8>, ptr addrspace(7) %p - ret <7 x i8> %ret -} - -define void @store_v7i8(<7 x i8> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v7i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v3 -; SDAG-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: v_lshlrev_b16_e32 v0, 8, v5 -; SDAG-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: buffer_store_short v0, off, s[16:19], 0 offset:4 -; SDAG-NEXT: buffer_store_byte v6, off, s[16:19], 0 offset:6 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v7i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_mov_b32_e32 v8, 8 -; GISEL-NEXT: v_mov_b32_e32 v7, 0xff -; GISEL-NEXT: v_lshlrev_b32_sdwa v1, v8, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_and_or_b32 v0, v0, v7, v1 -; GISEL-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GISEL-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GISEL-NEXT: v_or3_b32 v0, v0, v1, v2 -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: v_and_b32_e32 v0, 0xff, v5 -; GISEL-NEXT: v_lshlrev_b16_e32 v0, 8, v0 -; GISEL-NEXT: v_or_b32_sdwa v0, v4, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GISEL-NEXT: buffer_store_short v0, off, s[16:19], 0 offset:4 -; GISEL-NEXT: buffer_store_byte v6, off, s[16:19], 0 offset:6 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <7 x i8> %data, ptr addrspace(7) %p - ret void -} - -define <8 x i8> @load_v8i8(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v8i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: v_lshrrev_b64 v[3:4], 24, v[0:1] -; SDAG-NEXT: v_lshrrev_b32_e32 v8, 8, v0 -; SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; SDAG-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; SDAG-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; SDAG-NEXT: v_mov_b32_e32 v4, v1 -; SDAG-NEXT: v_mov_b32_e32 v1, v8 -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v8i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_lshrrev_b32_e32 v8, 8, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GISEL-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GISEL-NEXT: v_mov_b32_e32 v4, v1 -; GISEL-NEXT: v_mov_b32_e32 v1, v8 -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <8 x i8>, ptr addrspace(7) %p - ret <8 x i8> %ret -} - -define void @store_v8i8(<8 x i8> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v8i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_lshlrev_b16_e32 v5, 8, v5 -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; SDAG-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v5, 8, v7 -; SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v3 -; SDAG-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v3, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: buffer_store_dwordx2 v[3:4], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v8i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_mov_b32_e32 v9, 8 -; GISEL-NEXT: v_mov_b32_e32 v8, 0xff -; GISEL-NEXT: v_lshlrev_b32_sdwa v1, v9, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_and_or_b32 v0, v0, v8, v1 -; GISEL-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GISEL-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GISEL-NEXT: v_or3_b32 v0, v0, v1, v2 -; GISEL-NEXT: v_lshlrev_b32_sdwa v1, v9, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_and_b32_e32 v2, 0xff, v6 -; GISEL-NEXT: v_and_b32_e32 v3, 0xff, v7 -; GISEL-NEXT: v_and_or_b32 v1, v4, v8, v1 -; GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GISEL-NEXT: v_or3_b32 v1, v1, v2, v3 -; GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <8 x i8> %data, ptr addrspace(7) %p - ret void -} - -define <12 x i8> @load_v12i8(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v12i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: v_mov_b32_e32 v8, v2 -; SDAG-NEXT: v_lshrrev_b32_e32 v9, 8, v2 -; SDAG-NEXT: v_lshrrev_b64 v[3:4], 24, v[0:1] -; SDAG-NEXT: v_lshrrev_b32_e32 v14, 8, v0 -; SDAG-NEXT: v_lshrrev_b32_e32 v13, 16, v0 -; SDAG-NEXT: v_lshrrev_b64 v[11:12], 24, v[8:9] -; SDAG-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; SDAG-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; SDAG-NEXT: v_lshrrev_b32_e32 v10, 16, v2 -; SDAG-NEXT: v_mov_b32_e32 v4, v1 -; SDAG-NEXT: v_mov_b32_e32 v1, v14 -; SDAG-NEXT: v_mov_b32_e32 v2, v13 -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v12i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_lshrrev_b32_e32 v13, 8, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v12, 16, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GISEL-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v2 -; GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v2 -; GISEL-NEXT: v_lshrrev_b32_e32 v11, 24, v2 -; GISEL-NEXT: v_mov_b32_e32 v4, v1 -; GISEL-NEXT: v_mov_b32_e32 v8, v2 -; GISEL-NEXT: v_mov_b32_e32 v1, v13 -; GISEL-NEXT: v_mov_b32_e32 v2, v12 -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <12 x i8>, ptr addrspace(7) %p - ret <12 x i8> %ret -} - -define void @store_v12i8(<12 x i8> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v12i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_lshlrev_b16_e32 v9, 8, v9 -; SDAG-NEXT: v_lshlrev_b16_e32 v5, 8, v5 -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; SDAG-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v9, 8, v11 -; SDAG-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v5, 8, v7 -; SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v3 -; SDAG-NEXT: v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v7, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v6, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: buffer_store_dwordx3 v[6:8], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v12i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_mov_b32_e32 v13, 8 -; GISEL-NEXT: v_mov_b32_e32 v12, 0xff -; GISEL-NEXT: v_lshlrev_b32_sdwa v1, v13, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_and_or_b32 v0, v0, v12, v1 -; GISEL-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GISEL-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GISEL-NEXT: v_or3_b32 v0, v0, v1, v2 -; GISEL-NEXT: v_lshlrev_b32_sdwa v1, v13, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_and_b32_e32 v2, 0xff, v6 -; GISEL-NEXT: v_and_b32_e32 v3, 0xff, v7 -; GISEL-NEXT: v_and_or_b32 v1, v4, v12, v1 -; GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GISEL-NEXT: v_or3_b32 v1, v1, v2, v3 -; GISEL-NEXT: v_lshlrev_b32_sdwa v2, v13, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_and_b32_e32 v3, 0xff, v10 -; GISEL-NEXT: v_and_b32_e32 v4, 0xff, v11 -; GISEL-NEXT: v_and_or_b32 v2, v8, v12, v2 -; GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GISEL-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GISEL-NEXT: v_or3_b32 v2, v2, v3, v4 -; GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <12 x i8> %data, ptr addrspace(7) %p - ret void -} - -define <16 x i8> @load_v16i8(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v16i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: v_lshrrev_b64 v[18:19], 24, v[0:1] -; SDAG-NEXT: v_lshrrev_b64 v[11:12], 24, v[2:3] -; SDAG-NEXT: v_lshrrev_b32_e32 v17, 8, v0 -; SDAG-NEXT: v_lshrrev_b32_e32 v16, 16, v0 -; SDAG-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; SDAG-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; SDAG-NEXT: v_lshrrev_b32_e32 v9, 8, v2 -; SDAG-NEXT: v_lshrrev_b32_e32 v10, 16, v2 -; SDAG-NEXT: v_lshrrev_b32_e32 v13, 8, v3 -; SDAG-NEXT: v_lshrrev_b32_e32 v14, 16, v3 -; SDAG-NEXT: v_lshrrev_b32_e32 v15, 24, v3 -; SDAG-NEXT: v_mov_b32_e32 v4, v1 -; SDAG-NEXT: v_mov_b32_e32 v8, v2 -; SDAG-NEXT: v_mov_b32_e32 v12, v3 -; SDAG-NEXT: v_mov_b32_e32 v1, v17 -; SDAG-NEXT: v_mov_b32_e32 v2, v16 -; SDAG-NEXT: v_mov_b32_e32 v3, v18 -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v16i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_lshrrev_b32_e32 v16, 8, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v17, 16, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v18, 24, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GISEL-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v2 -; GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v2 -; GISEL-NEXT: v_lshrrev_b32_e32 v11, 24, v2 -; GISEL-NEXT: v_lshrrev_b32_e32 v13, 8, v3 -; GISEL-NEXT: v_lshrrev_b32_e32 v14, 16, v3 -; GISEL-NEXT: v_lshrrev_b32_e32 v15, 24, v3 -; GISEL-NEXT: v_mov_b32_e32 v4, v1 -; GISEL-NEXT: v_mov_b32_e32 v8, v2 -; GISEL-NEXT: v_mov_b32_e32 v12, v3 -; GISEL-NEXT: v_mov_b32_e32 v1, v16 -; GISEL-NEXT: v_mov_b32_e32 v2, v17 -; GISEL-NEXT: v_mov_b32_e32 v3, v18 -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <16 x i8>, ptr addrspace(7) %p - ret <16 x i8> %ret -} - -define void @store_v16i8(<16 x i8> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v16i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_lshlrev_b16_e32 v13, 8, v13 -; SDAG-NEXT: v_lshlrev_b16_e32 v9, 8, v9 -; SDAG-NEXT: v_lshlrev_b16_e32 v5, 8, v5 -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; SDAG-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v13, 8, v15 -; SDAG-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v9, 8, v11 -; SDAG-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v5, 8, v7 -; SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v3 -; SDAG-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v11, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v10, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v9, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: buffer_store_dwordx4 v[9:12], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v16i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_mov_b32_e32 v17, 8 -; GISEL-NEXT: v_mov_b32_e32 v16, 0xff -; GISEL-NEXT: v_lshlrev_b32_sdwa v1, v17, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_and_or_b32 v0, v0, v16, v1 -; GISEL-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GISEL-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GISEL-NEXT: v_or3_b32 v0, v0, v1, v2 -; GISEL-NEXT: v_lshlrev_b32_sdwa v1, v17, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_and_b32_e32 v2, 0xff, v6 -; GISEL-NEXT: v_and_b32_e32 v3, 0xff, v7 -; GISEL-NEXT: v_and_or_b32 v1, v4, v16, v1 -; GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GISEL-NEXT: v_or3_b32 v1, v1, v2, v3 -; GISEL-NEXT: v_lshlrev_b32_sdwa v2, v17, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_and_b32_e32 v3, 0xff, v10 -; GISEL-NEXT: v_and_b32_e32 v4, 0xff, v11 -; GISEL-NEXT: v_and_or_b32 v2, v8, v16, v2 -; GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GISEL-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GISEL-NEXT: v_or3_b32 v2, v2, v3, v4 -; GISEL-NEXT: v_lshlrev_b32_sdwa v3, v17, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_and_b32_e32 v4, 0xff, v14 -; GISEL-NEXT: v_and_b32_e32 v5, 0xff, v15 -; GISEL-NEXT: v_and_or_b32 v3, v12, v16, v3 -; GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GISEL-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GISEL-NEXT: v_or3_b32 v3, v3, v4, v5 -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <16 x i8> %data, ptr addrspace(7) %p - ret void -} - -define <32 x i8> @load_v32i8(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v32i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[33:36], off, s[16:19], 0 -; SDAG-NEXT: buffer_load_dwordx4 v[48:51], off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(1) -; SDAG-NEXT: v_lshrrev_b64 v[3:4], 24, v[33:34] -; SDAG-NEXT: v_lshrrev_b64 v[11:12], 24, v[35:36] -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: v_lshrrev_b64 v[19:20], 24, v[48:49] -; SDAG-NEXT: v_lshrrev_b64 v[27:28], 24, v[50:51] -; SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v33 -; SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v33 -; SDAG-NEXT: v_lshrrev_b32_e32 v5, 8, v34 -; SDAG-NEXT: v_lshrrev_b32_e32 v6, 16, v34 -; SDAG-NEXT: v_lshrrev_b32_e32 v7, 24, v34 -; SDAG-NEXT: v_lshrrev_b32_e32 v9, 8, v35 -; SDAG-NEXT: v_lshrrev_b32_e32 v10, 16, v35 -; SDAG-NEXT: v_lshrrev_b32_e32 v13, 8, v36 -; SDAG-NEXT: v_lshrrev_b32_e32 v14, 16, v36 -; SDAG-NEXT: v_lshrrev_b32_e32 v15, 24, v36 -; SDAG-NEXT: v_lshrrev_b32_e32 v17, 8, v48 -; SDAG-NEXT: v_lshrrev_b32_e32 v18, 16, v48 -; SDAG-NEXT: v_lshrrev_b32_e32 v21, 8, v49 -; SDAG-NEXT: v_lshrrev_b32_e32 v22, 16, v49 -; SDAG-NEXT: v_lshrrev_b32_e32 v23, 24, v49 -; SDAG-NEXT: v_lshrrev_b32_e32 v25, 8, v50 -; SDAG-NEXT: v_lshrrev_b32_e32 v26, 16, v50 -; SDAG-NEXT: v_lshrrev_b32_e32 v29, 8, v51 -; SDAG-NEXT: v_lshrrev_b32_e32 v30, 16, v51 -; SDAG-NEXT: v_lshrrev_b32_e32 v31, 24, v51 -; SDAG-NEXT: v_mov_b32_e32 v0, v33 -; SDAG-NEXT: v_mov_b32_e32 v4, v34 -; SDAG-NEXT: v_mov_b32_e32 v8, v35 -; SDAG-NEXT: v_mov_b32_e32 v12, v36 -; SDAG-NEXT: v_mov_b32_e32 v16, v48 -; SDAG-NEXT: v_mov_b32_e32 v20, v49 -; SDAG-NEXT: v_mov_b32_e32 v24, v50 -; SDAG-NEXT: v_mov_b32_e32 v28, v51 -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v32i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_load_dwordx4 v[16:19], off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(1) -; GISEL-NEXT: v_lshrrev_b32_e32 v35, 8, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v36, 16, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v37, 24, v0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_lshrrev_b32_e32 v32, 8, v16 -; GISEL-NEXT: v_lshrrev_b32_e32 v33, 16, v16 -; GISEL-NEXT: v_lshrrev_b32_e32 v34, 24, v16 -; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; GISEL-NEXT: v_lshrrev_b32_e32 v6, 16, v1 -; GISEL-NEXT: v_lshrrev_b32_e32 v7, 24, v1 -; GISEL-NEXT: v_lshrrev_b32_e32 v9, 8, v2 -; GISEL-NEXT: v_lshrrev_b32_e32 v10, 16, v2 -; GISEL-NEXT: v_lshrrev_b32_e32 v11, 24, v2 -; GISEL-NEXT: v_lshrrev_b32_e32 v13, 8, v3 -; GISEL-NEXT: v_lshrrev_b32_e32 v14, 16, v3 -; GISEL-NEXT: v_lshrrev_b32_e32 v15, 24, v3 -; GISEL-NEXT: v_lshrrev_b32_e32 v21, 8, v17 -; GISEL-NEXT: v_lshrrev_b32_e32 v22, 16, v17 -; GISEL-NEXT: v_lshrrev_b32_e32 v23, 24, v17 -; GISEL-NEXT: v_lshrrev_b32_e32 v25, 8, v18 -; GISEL-NEXT: v_lshrrev_b32_e32 v26, 16, v18 -; GISEL-NEXT: v_lshrrev_b32_e32 v27, 24, v18 -; GISEL-NEXT: v_lshrrev_b32_e32 v29, 8, v19 -; GISEL-NEXT: v_lshrrev_b32_e32 v30, 16, v19 -; GISEL-NEXT: v_lshrrev_b32_e32 v31, 24, v19 -; GISEL-NEXT: v_mov_b32_e32 v4, v1 -; GISEL-NEXT: v_mov_b32_e32 v8, v2 -; GISEL-NEXT: v_mov_b32_e32 v12, v3 -; GISEL-NEXT: v_mov_b32_e32 v20, v17 -; GISEL-NEXT: v_mov_b32_e32 v24, v18 -; GISEL-NEXT: v_mov_b32_e32 v28, v19 -; GISEL-NEXT: v_mov_b32_e32 v1, v35 -; GISEL-NEXT: v_mov_b32_e32 v2, v36 -; GISEL-NEXT: v_mov_b32_e32 v3, v37 -; GISEL-NEXT: v_mov_b32_e32 v17, v32 -; GISEL-NEXT: v_mov_b32_e32 v18, v33 -; GISEL-NEXT: v_mov_b32_e32 v19, v34 -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <32 x i8>, ptr addrspace(7) %p - ret <32 x i8> %ret -} - -define void @store_v32i8(<32 x i8> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v32i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_lshlrev_b16_e32 v13, 8, v13 -; SDAG-NEXT: v_or_b32_sdwa v12, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v13, 8, v15 -; SDAG-NEXT: v_or_b32_sdwa v13, v14, v13 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: buffer_load_ubyte v14, off, s[0:3], s32 -; SDAG-NEXT: v_lshlrev_b16_e32 v5, 8, v5 -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; SDAG-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v5, 8, v7 -; SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v3 -; SDAG-NEXT: v_lshlrev_b16_e32 v9, 8, v9 -; SDAG-NEXT: v_or_b32_sdwa v5, v6, v5 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v6, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v29 -; SDAG-NEXT: v_or_b32_sdwa v8, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v9, 8, v11 -; SDAG-NEXT: v_or_b32_sdwa v7, v28, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v25 -; SDAG-NEXT: v_or_b32_sdwa v9, v10, v9 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v10, v24, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v27 -; SDAG-NEXT: v_or_b32_sdwa v11, v26, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v21 -; SDAG-NEXT: v_lshlrev_b16_e32 v2, 8, v23 -; SDAG-NEXT: v_lshlrev_b16_e32 v3, 8, v17 -; SDAG-NEXT: v_lshlrev_b16_e32 v15, 8, v19 -; SDAG-NEXT: v_or_b32_sdwa v17, v20, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v19, v22, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v16, v16, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v3, v12, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v2, v8, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v1, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v0, v0, v6 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: v_or_b32_sdwa v15, v18, v15 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v5, v10, v11 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v4, v17, v19 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v3, v16, v15 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: s_waitcnt vmcnt(1) -; SDAG-NEXT: v_lshlrev_b16_e32 v0, 8, v14 -; SDAG-NEXT: v_or_b32_sdwa v0, v30, v0 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v6, v7, v0 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: buffer_store_dwordx4 v[3:6], off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v32i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_mov_b32_e32 v31, 8 -; GISEL-NEXT: v_lshlrev_b32_sdwa v1, v31, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_mov_b32_e32 v32, 0xff -; GISEL-NEXT: v_and_or_b32 v0, v0, v32, v1 -; GISEL-NEXT: v_lshlrev_b32_sdwa v1, v31, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_and_b32_e32 v5, 0xff, v7 -; GISEL-NEXT: buffer_load_ubyte v7, off, s[0:3], s32 -; GISEL-NEXT: v_and_or_b32 v1, v4, v32, v1 -; GISEL-NEXT: v_and_b32_e32 v2, 0xff, v2 -; GISEL-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GISEL-NEXT: v_and_b32_e32 v4, 0xff, v6 -; GISEL-NEXT: v_lshlrev_b32_e32 v2, 16, v2 -; GISEL-NEXT: v_lshlrev_b32_e32 v3, 24, v3 -; GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GISEL-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GISEL-NEXT: v_or3_b32 v0, v0, v2, v3 -; GISEL-NEXT: v_or3_b32 v1, v1, v4, v5 -; GISEL-NEXT: v_lshlrev_b32_sdwa v2, v31, v9 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_and_b32_e32 v3, 0xff, v10 -; GISEL-NEXT: v_and_b32_e32 v4, 0xff, v11 -; GISEL-NEXT: v_and_or_b32 v2, v8, v32, v2 -; GISEL-NEXT: v_lshlrev_b32_e32 v3, 16, v3 -; GISEL-NEXT: v_lshlrev_b32_e32 v4, 24, v4 -; GISEL-NEXT: v_or3_b32 v2, v2, v3, v4 -; GISEL-NEXT: v_lshlrev_b32_sdwa v3, v31, v13 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_and_b32_e32 v4, 0xff, v14 -; GISEL-NEXT: v_and_b32_e32 v5, 0xff, v15 -; GISEL-NEXT: v_and_or_b32 v3, v12, v32, v3 -; GISEL-NEXT: v_lshlrev_b32_e32 v4, 16, v4 -; GISEL-NEXT: v_lshlrev_b32_e32 v5, 24, v5 -; GISEL-NEXT: v_or3_b32 v3, v3, v4, v5 -; GISEL-NEXT: v_lshlrev_b32_sdwa v4, v31, v17 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_and_b32_e32 v5, 0xff, v18 -; GISEL-NEXT: v_and_b32_e32 v6, 0xff, v19 -; GISEL-NEXT: v_and_or_b32 v4, v16, v32, v4 -; GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 -; GISEL-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GISEL-NEXT: v_lshlrev_b32_sdwa v8, v31, v21 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_or3_b32 v4, v4, v5, v6 -; GISEL-NEXT: v_and_b32_e32 v5, 0xff, v22 -; GISEL-NEXT: v_and_b32_e32 v6, 0xff, v23 -; GISEL-NEXT: v_and_or_b32 v8, v20, v32, v8 -; GISEL-NEXT: v_lshlrev_b32_e32 v5, 16, v5 -; GISEL-NEXT: v_lshlrev_b32_e32 v6, 24, v6 -; GISEL-NEXT: v_or3_b32 v5, v8, v5, v6 -; GISEL-NEXT: v_lshlrev_b32_sdwa v6, v31, v25 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_and_b32_e32 v8, 0xff, v26 -; GISEL-NEXT: v_and_b32_e32 v9, 0xff, v27 -; GISEL-NEXT: v_and_or_b32 v6, v24, v32, v6 -; GISEL-NEXT: v_lshlrev_b32_e32 v8, 16, v8 -; GISEL-NEXT: v_lshlrev_b32_e32 v9, 24, v9 -; GISEL-NEXT: v_or3_b32 v6, v6, v8, v9 -; GISEL-NEXT: v_lshlrev_b32_sdwa v8, v31, v29 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_and_b32_e32 v9, 0xff, v30 -; GISEL-NEXT: v_and_or_b32 v8, v28, v32, v8 -; GISEL-NEXT: v_lshlrev_b32_e32 v9, 16, v9 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_lshlrev_b32_e32 v7, 24, v7 -; GISEL-NEXT: v_or3_b32 v7, v8, v9, v7 -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <32 x i8> %data, ptr addrspace(7) %p - ret void -} - -;;; Arrays. Need to become vectors. - -define [1 x i32] @load_a1i32(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_a1i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_a1i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load [1 x i32], ptr addrspace(7) %p - ret [1 x i32] %ret -} - -define void @store_a1i32([1 x i32] %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_a1i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_a1i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store [1 x i32] %data, ptr addrspace(7) %p - ret void -} - -define [2 x i32] @load_a2i32(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_a2i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_a2i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load [2 x i32], ptr addrspace(7) %p - ret [2 x i32] %ret -} - -define void @store_a2i32([2 x i32] %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_a2i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_a2i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store [2 x i32] %data, ptr addrspace(7) %p - ret void -} - -define [2 x half] @load_a2f16(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_a2f16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_a2f16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_lshrrev_b32_e32 v1, 16, v0 -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load [2 x half], ptr addrspace(7) %p - ret [2 x half] %ret -} - -define void @store_a2f16([2 x half] %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_a2f16: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: s_mov_b32 s4, 0x5040100 -; SDAG-NEXT: v_perm_b32 v0, v1, v0, s4 -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_a2f16: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store [2 x half] %data, ptr addrspace(7) %p - ret void -} - -define [2 x ptr addrspace(1)] @load_a2p1(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_a2p1: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_a2p1: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load [2 x ptr addrspace(1)], ptr addrspace(7) %p - ret [2 x ptr addrspace(1)] %ret -} - -define void @store_a2p1([2 x ptr addrspace(1)] %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_a2p1: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_a2p1: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store [2 x ptr addrspace(1)] %data, ptr addrspace(7) %p - ret void -} - -;;; Scalars of atypical width. Need to be cast to vectors and split. - -define i40 @load_i40(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_i40: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; SDAG-NEXT: buffer_load_ubyte v1, off, s[16:19], 0 offset:4 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_i40: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: buffer_load_ubyte v1, off, s[16:19], 0 offset:4 -; GISEL-NEXT: v_mov_b32_e32 v2, 0xff -; GISEL-NEXT: s_waitcnt vmcnt(1) -; GISEL-NEXT: v_lshrrev_b32_e32 v3, 8, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v4, 24, v0 -; GISEL-NEXT: v_and_b32_e32 v3, 0xff, v3 -; GISEL-NEXT: v_and_b32_sdwa v2, v0, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_1 src1_sel:DWORD -; GISEL-NEXT: v_lshlrev_b16_e32 v4, 8, v4 -; GISEL-NEXT: v_lshlrev_b16_e32 v3, 8, v3 -; GISEL-NEXT: v_or_b32_e32 v2, v2, v4 -; GISEL-NEXT: v_or_b32_sdwa v0, v0, v3 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GISEL-NEXT: v_and_b32_e32 v2, 0xffff, v2 -; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GISEL-NEXT: v_lshl_or_b32 v0, v2, 16, v0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load i40, ptr addrspace(7) %p - ret i40 %ret -} - -define void @store_i40(i40 %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_i40: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: buffer_store_byte v1, off, s[16:19], 0 offset:4 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_i40: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: buffer_store_byte v1, off, s[16:19], 0 offset:4 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store i40 %data, ptr addrspace(7) %p - ret void -} - -define i96 @load_i96(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_i96: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_i96: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load i96, ptr addrspace(7) %p - ret i96 %ret -} - -define void @store_i96(i96 %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_i96: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_i96: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store i96 %data, ptr addrspace(7) %p - ret void -} - -define i160 @load_i160(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_i160: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_load_dword v4, off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_mov_b32 s4, s33 -; SDAG-NEXT: s_add_i32 s33, s32, 0x7c0 -; SDAG-NEXT: s_and_b32 s33, s33, 0xfffff800 -; SDAG-NEXT: s_addk_i32 s32, 0x1800 -; SDAG-NEXT: s_addk_i32 s32, 0xe800 -; SDAG-NEXT: s_mov_b32 s33, s4 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_i160: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_load_dword v4, off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load i160, ptr addrspace(7) %p - ret i160 %ret -} - -define void @store_i160(i160 %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_i160: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: s_mov_b32 s4, s33 -; SDAG-NEXT: s_add_i32 s33, s32, 0x7c0 -; SDAG-NEXT: s_and_b32 s33, s33, 0xfffff800 -; SDAG-NEXT: s_addk_i32 s32, 0x1000 -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_addk_i32 s32, 0xf000 -; SDAG-NEXT: s_mov_b32 s33, s4 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_i160: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_store_dword v4, off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store i160 %data, ptr addrspace(7) %p - ret void -} - -define i256 @load_i256(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_i256: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_load_dwordx4 v[4:7], off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_i256: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_load_dwordx4 v[4:7], off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load i256, ptr addrspace(7) %p - ret i256 %ret -} - -define void @store_i256(i256 %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_i256: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_i256: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_store_dwordx4 v[4:7], off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store i256 %data, ptr addrspace(7) %p - ret void -} - -;;; Non-byte-sized scalars. Require zero-extension. - -define i7 @load_i7(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_i7: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_i7: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load i7, ptr addrspace(7) %p - ret i7 %ret -} - -define void @store_i7(i7 %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_i7: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_and_b32_e32 v0, 0x7f, v0 -; SDAG-NEXT: buffer_store_byte v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_i7: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0 -; GISEL-NEXT: buffer_store_byte v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store i7 %data, ptr addrspace(7) %p - ret void -} - -define i4 @load_i4(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_i4: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_i4: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load i4, ptr addrspace(7) %p - ret i4 %ret -} - -define void @store_i4(i4 %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_i4: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_and_b32_e32 v0, 15, v0 -; SDAG-NEXT: buffer_store_byte v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_i4: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_and_b32_e32 v0, 15, v0 -; GISEL-NEXT: buffer_store_byte v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store i4 %data, ptr addrspace(7) %p - ret void -} - - -;;; Byte-sized vectors of i4. Require casts. - -define <2 x i4> @load_v2i4(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v2i4: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; SDAG-NEXT: buffer_load_ubyte v1, off, s[0:3], s32 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: v_and_b32_e32 v0, 15, v1 -; SDAG-NEXT: v_lshrrev_b16_e32 v1, 4, v1 -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v2i4: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_lshrrev_b32_e32 v1, 4, v0 -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <2 x i4>, ptr addrspace(7) %p - ret <2 x i4> %ret -} - -define void @store_v2i4(<2 x i4> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v2i4: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 4, v1 -; SDAG-NEXT: v_and_b32_e32 v0, 15, v0 -; SDAG-NEXT: v_or_b32_e32 v0, v0, v1 -; SDAG-NEXT: buffer_store_byte v0, off, s[0:3], s32 -; SDAG-NEXT: buffer_load_ubyte v0, off, s[0:3], s32 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: buffer_store_byte v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v2i4: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_and_b32_e32 v1, 15, v1 -; GISEL-NEXT: v_and_b32_e32 v0, 15, v0 -; GISEL-NEXT: v_lshlrev_b16_e32 v1, 4, v1 -; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 -; GISEL-NEXT: buffer_store_byte v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <2 x i4> %data, ptr addrspace(7) %p - ret void -} - -define <4 x i4> @load_v4i4(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v4i4: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_ushort v0, off, s[16:19], 0 -; SDAG-NEXT: v_mov_b32_e32 v2, 15 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: buffer_store_short v0, off, s[0:3], s32 -; SDAG-NEXT: buffer_load_ushort v1, off, s[0:3], s32 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: v_lshrrev_b16_e32 v4, 4, v1 -; SDAG-NEXT: v_and_b32_e32 v0, 15, v1 -; SDAG-NEXT: v_lshrrev_b16_e32 v3, 12, v1 -; SDAG-NEXT: v_and_b32_sdwa v2, v1, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_1 src1_sel:DWORD -; SDAG-NEXT: v_and_b32_e32 v1, 15, v4 -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v4i4: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_ushort v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_lshrrev_b32_e32 v1, 4, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v3, 12, v0 -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <4 x i4>, ptr addrspace(7) %p - ret <4 x i4> %ret -} - -define void @store_v4i4(<4 x i4> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v4i4: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_and_b32_e32 v1, 15, v1 -; SDAG-NEXT: v_and_b32_e32 v0, 15, v0 -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 4, v1 -; SDAG-NEXT: v_or_b32_e32 v0, v0, v1 -; SDAG-NEXT: v_mov_b32_e32 v1, 15 -; SDAG-NEXT: v_and_b32_sdwa v1, v2, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; SDAG-NEXT: v_or_b32_e32 v0, v0, v1 -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 12, v3 -; SDAG-NEXT: v_or_b32_e32 v0, v0, v1 -; SDAG-NEXT: buffer_store_short v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v4i4: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_and_b32_e32 v1, 15, v1 -; GISEL-NEXT: v_and_b32_e32 v0, 15, v0 -; GISEL-NEXT: v_lshlrev_b16_e32 v1, 4, v1 -; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 -; GISEL-NEXT: v_mov_b32_e32 v1, 15 -; GISEL-NEXT: v_and_b32_sdwa v1, v2, v1 dst_sel:BYTE_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 -; GISEL-NEXT: v_and_b32_e32 v1, 15, v3 -; GISEL-NEXT: v_lshlrev_b16_e32 v1, 12, v1 -; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 -; GISEL-NEXT: buffer_store_short v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <4 x i4> %data, ptr addrspace(7) %p - ret void -} - -define <8 x i4> @load_v8i4(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v8i4: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v7, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: v_and_b32_e32 v0, 15, v7 -; SDAG-NEXT: v_bfe_u32 v1, v7, 4, 4 -; SDAG-NEXT: v_bfe_u32 v2, v7, 8, 4 -; SDAG-NEXT: v_bfe_u32 v3, v7, 12, 4 -; SDAG-NEXT: v_bfe_u32 v4, v7, 16, 4 -; SDAG-NEXT: v_bfe_u32 v5, v7, 20, 4 -; SDAG-NEXT: v_bfe_u32 v6, v7, 24, 4 -; SDAG-NEXT: v_lshrrev_b32_e32 v7, 28, v7 -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v8i4: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_lshrrev_b32_e32 v1, 4, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v2, 8, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v3, 12, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v4, 16, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v5, 20, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v6, 24, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v7, 28, v0 -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <8 x i4>, ptr addrspace(7) %p - ret <8 x i4> %ret -} - -define void @store_v8i4(<8 x i4> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v8i4: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_and_b32_e32 v1, 15, v1 -; SDAG-NEXT: v_lshlrev_b32_e32 v1, 4, v1 -; SDAG-NEXT: v_and_or_b32 v0, v0, 15, v1 -; SDAG-NEXT: v_and_b32_e32 v1, 15, v3 -; SDAG-NEXT: v_and_b32_e32 v2, 15, v2 -; SDAG-NEXT: v_lshlrev_b32_e32 v1, 12, v1 -; SDAG-NEXT: v_lshlrev_b32_e32 v2, 8, v2 -; SDAG-NEXT: v_or3_b32 v0, v0, v2, v1 -; SDAG-NEXT: v_and_b32_e32 v1, 15, v5 -; SDAG-NEXT: v_mov_b32_e32 v2, 15 -; SDAG-NEXT: v_lshlrev_b32_e32 v1, 20, v1 -; SDAG-NEXT: v_and_b32_sdwa v3, v4, v2 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; SDAG-NEXT: v_or3_b32 v0, v0, v3, v1 -; SDAG-NEXT: v_lshlrev_b32_e32 v1, 28, v7 -; SDAG-NEXT: v_and_b32_sdwa v2, v6, v2 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; SDAG-NEXT: v_or3_b32 v0, v0, v2, v1 -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v8i4: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_and_b32_e32 v1, 15, v1 -; GISEL-NEXT: v_lshlrev_b32_e32 v1, 4, v1 -; GISEL-NEXT: v_and_or_b32 v0, v0, 15, v1 -; GISEL-NEXT: v_and_b32_e32 v1, 15, v2 -; GISEL-NEXT: v_and_b32_e32 v2, 15, v3 -; GISEL-NEXT: v_lshlrev_b32_e32 v1, 8, v1 -; GISEL-NEXT: v_lshlrev_b32_e32 v2, 12, v2 -; GISEL-NEXT: v_or3_b32 v0, v0, v1, v2 -; GISEL-NEXT: v_mov_b32_e32 v1, 15 -; GISEL-NEXT: v_and_b32_e32 v3, 15, v5 -; GISEL-NEXT: v_and_b32_sdwa v2, v4, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GISEL-NEXT: v_lshlrev_b32_e32 v3, 20, v3 -; GISEL-NEXT: v_or3_b32 v0, v0, v2, v3 -; GISEL-NEXT: v_and_b32_e32 v2, 15, v7 -; GISEL-NEXT: v_and_b32_sdwa v1, v6, v1 dst_sel:BYTE_3 dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:DWORD -; GISEL-NEXT: v_lshlrev_b32_e32 v2, 28, v2 -; GISEL-NEXT: v_or3_b32 v0, v0, v1, v2 -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <8 x i4> %data, ptr addrspace(7) %p - ret void -} - -;;; Vectors of non-byte-sized integers. - -define <2 x i6> @load_v2i6(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v2i6: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_ushort v1, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: v_and_b32_e32 v0, 63, v1 -; SDAG-NEXT: v_bfe_u32 v1, v1, 6, 6 -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v2i6: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_ushort v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_lshrrev_b16_e32 v1, 6, v0 -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <2 x i6>, ptr addrspace(7) %p - ret <2 x i6> %ret -} - -define void @store_v2i6(<2 x i6> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v2i6: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 6, v1 -; SDAG-NEXT: v_and_b32_e32 v0, 63, v0 -; SDAG-NEXT: v_or_b32_e32 v0, v0, v1 -; SDAG-NEXT: v_and_b32_e32 v0, 0xfff, v0 -; SDAG-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; SDAG-NEXT: buffer_store_short v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v2i6: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_and_b32_e32 v1, 63, v1 -; GISEL-NEXT: v_and_b32_e32 v0, 63, v0 -; GISEL-NEXT: v_lshlrev_b16_e32 v1, 6, v1 -; GISEL-NEXT: v_or_b32_e32 v0, v0, v1 -; GISEL-NEXT: v_and_b32_e32 v0, 0xfff, v0 -; GISEL-NEXT: buffer_store_short v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <2 x i6> %data, ptr addrspace(7) %p - ret void -} - -;; Blocks of fp6 elements -define <6 x i32> @load_v32i6(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_v32i6: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_load_dwordx2 v[4:5], off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_v32i6: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_load_dwordx2 v[4:5], off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load <32 x i6>, ptr addrspace(7) %p - %ret.cast = bitcast <32 x i6> %ret to <6 x i32> - ret <6 x i32> %ret.cast -} - -define void @store_v32i6(<6 x i32> %data.abi, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_v32i6: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: buffer_store_dwordx2 v[4:5], off, s[16:19], 0 offset:16 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_v32i6: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: buffer_store_dwordx2 v[4:5], off, s[16:19], 0 offset:16 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %data = bitcast <6 x i32> %data.abi to <32 x i6> - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store <32 x i6> %data, ptr addrspace(7) %p - ret void -} - -;;; Modifiers - -define <4 x i8> @volatile_load_v4i8(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: volatile_load_v4i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 glc -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; SDAG-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; SDAG-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: volatile_load_v4i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 glc -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load volatile <4 x i8>, ptr addrspace(7) %p - ret <4 x i8> %ret -} - -define void @volatile_store_v4i8(<4 x i8> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: volatile_store_v4i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v3 -; SDAG-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: volatile_store_v4i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_mov_b32_e32 v5, 8 -; GISEL-NEXT: v_mov_b32_e32 v4, 0xff -; GISEL-NEXT: v_lshlrev_b32_sdwa v1, v5, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:DWORD src1_sel:BYTE_0 -; GISEL-NEXT: v_and_or_b32 v0, v0, v4, v1 -; GISEL-NEXT: v_and_b32_e32 v1, 0xff, v2 -; GISEL-NEXT: v_and_b32_e32 v2, 0xff, v3 -; GISEL-NEXT: v_lshlrev_b32_e32 v1, 16, v1 -; GISEL-NEXT: v_lshlrev_b32_e32 v2, 24, v2 -; GISEL-NEXT: v_or3_b32 v0, v0, v1, v2 -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store volatile <4 x i8> %data, ptr addrspace(7) %p - ret void -} - -define <6 x i8> @volatile_load_v6i8(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: volatile_load_v6i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 glc -; SDAG-NEXT: buffer_load_ushort v6, off, s[16:19], 0 offset:4 glc -; SDAG-NEXT: s_waitcnt vmcnt(1) -; SDAG-NEXT: v_lshrrev_b32_e32 v7, 8, v0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: v_and_b32_e32 v1, 0xffff, v6 -; SDAG-NEXT: v_lshrrev_b64 v[3:4], 24, v[0:1] -; SDAG-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; SDAG-NEXT: v_lshrrev_b32_e32 v5, 8, v1 -; SDAG-NEXT: v_mov_b32_e32 v4, v6 -; SDAG-NEXT: v_mov_b32_e32 v1, v7 -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: volatile_load_v6i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 glc -; GISEL-NEXT: buffer_load_ushort v4, off, s[16:19], 0 offset:4 glc -; GISEL-NEXT: s_waitcnt vmcnt(1) -; GISEL-NEXT: v_lshrrev_b32_e32 v2, 16, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v1, 8, v0 -; GISEL-NEXT: v_lshrrev_b32_e32 v3, 24, v0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: v_lshrrev_b32_e32 v5, 8, v4 -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load volatile <6 x i8>, ptr addrspace(7) %p - ret <6 x i8> %ret -} - -define void @volatile_store_v6i8(<6 x i8> %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: volatile_store_v6i8: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v1, 8, v3 -; SDAG-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:WORD_1 dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: v_lshlrev_b16_e32 v5, 8, v5 -; SDAG-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD -; SDAG-NEXT: v_or_b32_sdwa v4, v4, v5 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: buffer_store_short v4, off, s[16:19], 0 offset:4 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: volatile_store_v6i8: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_and_b32_e32 v1, 0xff, v1 -; GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GISEL-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GISEL-NEXT: v_and_b32_e32 v1, 0xff, v3 -; GISEL-NEXT: v_lshlrev_b16_e32 v1, 8, v1 -; GISEL-NEXT: v_or_b32_sdwa v1, v2, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GISEL-NEXT: v_and_b32_e32 v2, 0xff, v5 -; GISEL-NEXT: v_and_b32_e32 v0, 0xffff, v0 -; GISEL-NEXT: v_lshlrev_b16_e32 v2, 8, v2 -; GISEL-NEXT: v_lshl_or_b32 v0, v1, 16, v0 -; GISEL-NEXT: v_or_b32_sdwa v2, v4, v2 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:BYTE_0 src1_sel:DWORD -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: buffer_store_short v2, off, s[16:19], 0 offset:4 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store volatile <6 x i8> %data, ptr addrspace(7) %p - ret void -} - -define [2 x [2 x i32]] @load_a2a2i32(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_a2a2i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_a2a2i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load [2 x [2 x i32]], ptr addrspace(7) %p - ret [2 x [2 x i32]] %ret -} - -define void @store_a2a2i32([2 x [2 x i32]] %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_a2a2i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_a2a2i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store [2 x [2 x i32]] %data, ptr addrspace(7) %p - ret void -} - -define [2 x <2 x i32>] @load_a2v2i32(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_a2v2i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_a2v2i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load [2 x <2 x i32>], ptr addrspace(7) %p - ret [2 x <2 x i32>] %ret -} - -define void @store_a2v2i32([2 x <2 x i32>] %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_a2v2i32: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_a2v2i32: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx4 v[0:3], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store [2 x <2 x i32>] %data, ptr addrspace(7) %p - ret void -} - -define { i32 } @load_sl_i32s(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_sl_i32s: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_sl_i32s: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load { i32 }, ptr addrspace(7) %p - ret { i32 } %ret -} - -define void @store_sl_i32s({ i32 } %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_sl_i32s: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_sl_i32s: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store { i32 } %data, ptr addrspace(7) %p - ret void -} - -define { { float } } @load_sl_sl_f32ss(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_sl_sl_f32ss: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_sl_sl_f32ss: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load { { float } }, ptr addrspace(7) %p - ret { { float } } %ret -} - -define void @store_sl_sl_f32ss({ { float } } %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_sl_sl_f32ss: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_sl_sl_f32ss: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dword v0, off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store { { float } } %data, ptr addrspace(7) %p - ret void -} - -define { <2 x i32> } @load_sl_v2i32s(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_sl_v2i32s: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_sl_v2i32s: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load { <2 x i32> }, ptr addrspace(7) %p - ret { <2 x i32> } %ret -} - -define void @store_sl_v2i32s({ <2 x i32> } %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_sl_v2i32s: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_sl_v2i32s: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx2 v[0:1], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store { <2 x i32> } %data, ptr addrspace(7) %p - ret void -} - -define { i64, i32 } @load_sl_i64i32s(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_sl_i64i32s: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_sl_i64i32s: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_dwordx3 v[0:2], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load { i64, i32 }, ptr addrspace(7) %p - ret { i64, i32 } %ret -} - -define void @store_sl_i64i32s({ i64, i32 } %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_sl_i64i32s: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_store_dwordx3 v[0:2], off, s[16:19], 0 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_sl_i64i32s: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_store_dwordx3 v[0:2], off, s[16:19], 0 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store { i64, i32 } %data, ptr addrspace(7) %p - ret void -} - -define [4 x i7] @load_a4i7(ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: load_a4i7: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 -; SDAG-NEXT: buffer_load_ubyte v1, off, s[16:19], 0 offset:1 -; SDAG-NEXT: buffer_load_ubyte v2, off, s[16:19], 0 offset:2 -; SDAG-NEXT: buffer_load_ubyte v3, off, s[16:19], 0 offset:3 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: load_a4i7: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: buffer_load_ubyte v0, off, s[16:19], 0 -; GISEL-NEXT: buffer_load_ubyte v1, off, s[16:19], 0 offset:1 -; GISEL-NEXT: buffer_load_ubyte v2, off, s[16:19], 0 offset:2 -; GISEL-NEXT: buffer_load_ubyte v3, off, s[16:19], 0 offset:3 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load [4 x i7], ptr addrspace(7) %p - ret [4 x i7] %ret -} - -define void @store_a4i7([4 x i7] %data, ptr addrspace(8) inreg %buf) { -; SDAG-LABEL: store_a4i7: -; SDAG: ; %bb.0: -; SDAG-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; SDAG-NEXT: v_and_b32_e32 v0, 0x7f, v0 -; SDAG-NEXT: buffer_store_byte v0, off, s[16:19], 0 -; SDAG-NEXT: v_and_b32_e32 v0, 0x7f, v1 -; SDAG-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:1 -; SDAG-NEXT: v_and_b32_e32 v0, 0x7f, v2 -; SDAG-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:2 -; SDAG-NEXT: v_and_b32_e32 v0, 0x7f, v3 -; SDAG-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:3 -; SDAG-NEXT: s_waitcnt vmcnt(0) -; SDAG-NEXT: s_setpc_b64 s[30:31] -; -; GISEL-LABEL: store_a4i7: -; GISEL: ; %bb.0: -; GISEL-NEXT: s_waitcnt vmcnt(0) expcnt(0) lgkmcnt(0) -; GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v0 -; GISEL-NEXT: buffer_store_byte v0, off, s[16:19], 0 -; GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v1 -; GISEL-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:1 -; GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v2 -; GISEL-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:2 -; GISEL-NEXT: v_and_b32_e32 v0, 0x7f, v3 -; GISEL-NEXT: buffer_store_byte v0, off, s[16:19], 0 offset:3 -; GISEL-NEXT: s_waitcnt vmcnt(0) -; GISEL-NEXT: s_setpc_b64 s[30:31] - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store [4 x i7] %data, ptr addrspace(7) %p - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.nxv2i32.fail.ll b/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.nxv2i32.fail.ll deleted file mode 100644 index a91d38a58a1e9..0000000000000 --- a/llvm/test/CodeGen/AMDGPU/llvm.amdgcn.raw.ptr.buffer.store.nxv2i32.fail.ll +++ /dev/null @@ -1,11 +0,0 @@ -; Note: The exact error messages aren't important here, but are included to catch -; anything changing. -; RUN: not --crash llc -global-isel=0 -mtriple=amdgcn -mcpu=gfx900 -filetype=null < %s 2>&1 | FileCheck %s --check-prefix=SDAG -; SDAG: LLVM ERROR: Scalarization of scalable vectors is not supported. -; RUN: not --crash llc -global-isel=1 -mtriple=amdgcn -mcpu=gfx900 -filetype=null < %s 2>&1 | FileCheck %s --check-prefix=GISEL -; GISEL: LLVM ERROR: Invalid size request on a scalable vector. - -define void @buffer_store_nxv2i32(ptr addrspace(8) inreg %rsrc, i32 %offset) { - call void @llvm.amdgcn.raw.ptr.buffer.store.nxv2i32( poison, ptr addrspace(8) %rsrc, i32 %offset, i32 0, i32 0) - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-calls.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-calls.ll index 022094bc633c8..6f0d51a027738 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-calls.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-calls.ll @@ -91,12 +91,7 @@ define void @caller(ptr addrspace(7) noundef nonnull %arg) { ; CHECK-NEXT: [[TMP1:%.*]] = shl nuw i160 [[V_INT_RSRC]], 32 ; CHECK-NEXT: [[V_INT_OFF:%.*]] = zext i32 [[V_OFF]] to i160 ; CHECK-NEXT: [[V_INT:%.*]] = or i160 [[TMP1]], [[V_INT_OFF]] -; CHECK-NEXT: [[V_INT_LEGAL:%.*]] = bitcast i160 [[V_INT]] to <5 x i32> -; CHECK-NEXT: [[V_INT_SLICE_0:%.*]] = shufflevector <5 x i32> [[V_INT_LEGAL]], <5 x i32> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[V_INT_SLICE_0]], ptr addrspace(8) align 32 [[ARG_RSRC]], i32 [[ARG_OFF]], i32 0, i32 0) -; CHECK-NEXT: [[ARG_PART_4:%.*]] = add nuw i32 [[ARG_OFF]], 16 -; CHECK-NEXT: [[V_INT_SLICE_4:%.*]] = extractelement <5 x i32> [[V_INT_LEGAL]], i64 4 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[V_INT_SLICE_4]], ptr addrspace(8) align 16 [[ARG_RSRC]], i32 [[ARG_PART_4]], i32 0, i32 0) +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i160(i160 [[V_INT]], ptr addrspace(8) align 32 [[ARG_RSRC]], i32 [[ARG_OFF]], i32 0, i32 0) ; CHECK-NEXT: ret void ; %v = call ptr addrspace(7) @extern(ptr addrspace(7) %arg) diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-contents-legalization.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-contents-legalization.ll index d18f0f8bd1ff9..5b225636b120a 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-contents-legalization.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-contents-legalization.ll @@ -1,17 +1,13 @@ ; NOTE: Assertions have been autogenerated by utils/update_test_checks.py UTC_ARGS: --version 5 ; RUN: opt -S -mcpu=gfx900 -passes=amdgpu-lower-buffer-fat-pointers < %s | FileCheck %s -; Note: if you're adding tests here, also add them to -; buffer-fat-pointers-contents-legalization.ll to make sure the output of this -; transformation can codegen. - target triple = "amdgcn--" ;;; Legal types. These are natively supported, no casts should be performed. -define i8 @load_i8(ptr addrspace(8) inreg %buf) { +define i8 @load_i8(ptr addrspace(8) %buf) { ; CHECK-LABEL: define i8 @load_i8( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0:[0-9]+]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0:[0-9]+]] { ; CHECK-NEXT: [[RET:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret i8 [[RET]] ; @@ -20,9 +16,9 @@ define i8 @load_i8(ptr addrspace(8) inreg %buf) { ret i8 %ret } -define void @store_i8(i8 %data, ptr addrspace(8) inreg %buf) { +define void @store_i8(i8 %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_i8( -; CHECK-SAME: i8 [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: i8 [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 [[DATA]], ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -31,9 +27,9 @@ define void @store_i8(i8 %data, ptr addrspace(8) inreg %buf) { ret void } -define i16 @load_i16(ptr addrspace(8) inreg %buf) { +define i16 @load_i16(ptr addrspace(8) %buf) { ; CHECK-LABEL: define i16 @load_i16( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret i16 [[RET]] ; @@ -42,9 +38,9 @@ define i16 @load_i16(ptr addrspace(8) inreg %buf) { ret i16 %ret } -define void @store_i16(i16 %data, ptr addrspace(8) inreg %buf) { +define void @store_i16(i16 %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_i16( -; CHECK-SAME: i16 [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: i16 [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 [[DATA]], ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -53,9 +49,9 @@ define void @store_i16(i16 %data, ptr addrspace(8) inreg %buf) { ret void } -define i32 @load_i32(ptr addrspace(8) inreg %buf) { +define i32 @load_i32(ptr addrspace(8) %buf) { ; CHECK-LABEL: define i32 @load_i32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret i32 [[RET]] ; @@ -64,9 +60,9 @@ define i32 @load_i32(ptr addrspace(8) inreg %buf) { ret i32 %ret } -define void @store_i32(i32 %data, ptr addrspace(8) inreg %buf) { +define void @store_i32(i32 %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_i32( -; CHECK-SAME: i32 [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: i32 [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -75,9 +71,9 @@ define void @store_i32(i32 %data, ptr addrspace(8) inreg %buf) { ret void } -define i64 @load_i64(ptr addrspace(8) inreg %buf) { +define i64 @load_i64(ptr addrspace(8) %buf) { ; CHECK-LABEL: define i64 @load_i64( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call i64 @llvm.amdgcn.raw.ptr.buffer.load.i64(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret i64 [[RET]] ; @@ -86,9 +82,9 @@ define i64 @load_i64(ptr addrspace(8) inreg %buf) { ret i64 %ret } -define void @store_i64(i64 %data, ptr addrspace(8) inreg %buf) { +define void @store_i64(i64 %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_i64( -; CHECK-SAME: i64 [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: i64 [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i64(i64 [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -97,9 +93,9 @@ define void @store_i64(i64 %data, ptr addrspace(8) inreg %buf) { ret void } -define i128 @load_i128(ptr addrspace(8) inreg %buf) { +define i128 @load_i128(ptr addrspace(8) %buf) { ; CHECK-LABEL: define i128 @load_i128( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call i128 @llvm.amdgcn.raw.ptr.buffer.load.i128(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret i128 [[RET]] ; @@ -108,9 +104,9 @@ define i128 @load_i128(ptr addrspace(8) inreg %buf) { ret i128 %ret } -define void @store_i128(i128 %data, ptr addrspace(8) inreg %buf) { +define void @store_i128(i128 %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_i128( -; CHECK-SAME: i128 [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: i128 [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i128(i128 [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -119,11 +115,10 @@ define void @store_i128(i128 %data, ptr addrspace(8) inreg %buf) { ret void } -define <1 x i32> @load_v1i32(ptr addrspace(8) inreg %buf) { +define <1 x i32> @load_v1i32(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <1 x i32> @load_v1i32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = bitcast i32 [[RET_LOADABLE]] to <1 x i32> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <1 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v1i32(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <1 x i32> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -131,11 +126,10 @@ define <1 x i32> @load_v1i32(ptr addrspace(8) inreg %buf) { ret <1 x i32> %ret } -define void @store_v1i32(<1 x i32> %data, ptr addrspace(8) inreg %buf) { +define void @store_v1i32(<1 x i32> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v1i32( -; CHECK-SAME: <1 x i32> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_STORABLE:%.*]] = bitcast <1 x i32> [[DATA]] to i32 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[DATA_STORABLE]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: <1 x i32> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v1i32(<1 x i32> [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -143,9 +137,9 @@ define void @store_v1i32(<1 x i32> %data, ptr addrspace(8) inreg %buf) { ret void } -define <2 x i32> @load_v2i32(ptr addrspace(8) inreg %buf) { +define <2 x i32> @load_v2i32(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <2 x i32> @load_v2i32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <2 x i32> [[RET]] ; @@ -154,9 +148,9 @@ define <2 x i32> @load_v2i32(ptr addrspace(8) inreg %buf) { ret <2 x i32> %ret } -define void @store_v2i32(<2 x i32> %data, ptr addrspace(8) inreg %buf) { +define void @store_v2i32(<2 x i32> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v2i32( -; CHECK-SAME: <2 x i32> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: <2 x i32> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -165,9 +159,9 @@ define void @store_v2i32(<2 x i32> %data, ptr addrspace(8) inreg %buf) { ret void } -define <3 x i32> @load_v3i32(ptr addrspace(8) inreg %buf) { +define <3 x i32> @load_v3i32(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <3 x i32> @load_v3i32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <3 x i32> [[RET]] ; @@ -176,9 +170,9 @@ define <3 x i32> @load_v3i32(ptr addrspace(8) inreg %buf) { ret <3 x i32> %ret } -define void @store_v3i32(<3 x i32> %data, ptr addrspace(8) inreg %buf) { +define void @store_v3i32(<3 x i32> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v3i32( -; CHECK-SAME: <3 x i32> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: <3 x i32> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -187,9 +181,9 @@ define void @store_v3i32(<3 x i32> %data, ptr addrspace(8) inreg %buf) { ret void } -define <4 x i32> @load_v4i32(ptr addrspace(8) inreg %buf) { +define <4 x i32> @load_v4i32(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <4 x i32> @load_v4i32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <4 x i32> [[RET]] ; @@ -198,9 +192,9 @@ define <4 x i32> @load_v4i32(ptr addrspace(8) inreg %buf) { ret <4 x i32> %ret } -define void @store_v4i32(<4 x i32> %data, ptr addrspace(8) inreg %buf) { +define void @store_v4i32(<4 x i32> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v4i32( -; CHECK-SAME: <4 x i32> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: <4 x i32> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -209,9 +203,9 @@ define void @store_v4i32(<4 x i32> %data, ptr addrspace(8) inreg %buf) { ret void } -define <2 x i16> @load_v2i16(ptr addrspace(8) inreg %buf) { +define <2 x i16> @load_v2i16(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <2 x i16> @load_v2i16( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call <2 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v2i16(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <2 x i16> [[RET]] ; @@ -220,9 +214,9 @@ define <2 x i16> @load_v2i16(ptr addrspace(8) inreg %buf) { ret <2 x i16> %ret } -define void @store_v2i16(<2 x i16> %data, ptr addrspace(8) inreg %buf) { +define void @store_v2i16(<2 x i16> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v2i16( -; CHECK-SAME: <2 x i16> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: <2 x i16> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i16(<2 x i16> [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -231,9 +225,9 @@ define void @store_v2i16(<2 x i16> %data, ptr addrspace(8) inreg %buf) { ret void } -define <4 x i16> @load_v4i16(ptr addrspace(8) inreg %buf) { +define <4 x i16> @load_v4i16(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <4 x i16> @load_v4i16( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call <4 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v4i16(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <4 x i16> [[RET]] ; @@ -242,9 +236,9 @@ define <4 x i16> @load_v4i16(ptr addrspace(8) inreg %buf) { ret <4 x i16> %ret } -define void @store_v4i16(<4 x i16> %data, ptr addrspace(8) inreg %buf) { +define void @store_v4i16(<4 x i16> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v4i16( -; CHECK-SAME: <4 x i16> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: <4 x i16> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i16(<4 x i16> [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -253,9 +247,9 @@ define void @store_v4i16(<4 x i16> %data, ptr addrspace(8) inreg %buf) { ret void } -define <8 x i16> @load_v8i16(ptr addrspace(8) inreg %buf) { +define <8 x i16> @load_v8i16(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <8 x i16> @load_v8i16( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call <8 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v8i16(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <8 x i16> [[RET]] ; @@ -264,9 +258,9 @@ define <8 x i16> @load_v8i16(ptr addrspace(8) inreg %buf) { ret <8 x i16> %ret } -define void @store_v8i16(<8 x i16> %data, ptr addrspace(8) inreg %buf) { +define void @store_v8i16(<8 x i16> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v8i16( -; CHECK-SAME: <8 x i16> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: <8 x i16> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v8i16(<8 x i16> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -275,9 +269,9 @@ define void @store_v8i16(<8 x i16> %data, ptr addrspace(8) inreg %buf) { ret void } -define <2 x i64> @load_v2i64(ptr addrspace(8) inreg %buf) { +define <2 x i64> @load_v2i64(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <2 x i64> @load_v2i64( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call <2 x i64> @llvm.amdgcn.raw.ptr.buffer.load.v2i64(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <2 x i64> [[RET]] ; @@ -286,9 +280,9 @@ define <2 x i64> @load_v2i64(ptr addrspace(8) inreg %buf) { ret <2 x i64> %ret } -define void @store_v2i64(<2 x i64> %data, ptr addrspace(8) inreg %buf) { +define void @store_v2i64(<2 x i64> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v2i64( -; CHECK-SAME: <2 x i64> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: <2 x i64> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i64(<2 x i64> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -297,9 +291,9 @@ define void @store_v2i64(<2 x i64> %data, ptr addrspace(8) inreg %buf) { ret void } -define half @load_f16(ptr addrspace(8) inreg %buf) { +define half @load_f16(ptr addrspace(8) %buf) { ; CHECK-LABEL: define half @load_f16( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call half @llvm.amdgcn.raw.ptr.buffer.load.f16(ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret half [[RET]] ; @@ -308,9 +302,9 @@ define half @load_f16(ptr addrspace(8) inreg %buf) { ret half %ret } -define void @store_f16(half %data, ptr addrspace(8) inreg %buf) { +define void @store_f16(half %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_f16( -; CHECK-SAME: half [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: half [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f16(half [[DATA]], ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -319,9 +313,9 @@ define void @store_f16(half %data, ptr addrspace(8) inreg %buf) { ret void } -define bfloat @load_bf16(ptr addrspace(8) inreg %buf) { +define bfloat @load_bf16(ptr addrspace(8) %buf) { ; CHECK-LABEL: define bfloat @load_bf16( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call bfloat @llvm.amdgcn.raw.ptr.buffer.load.bf16(ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret bfloat [[RET]] ; @@ -330,9 +324,9 @@ define bfloat @load_bf16(ptr addrspace(8) inreg %buf) { ret bfloat %ret } -define void @store_bf16(bfloat %data, ptr addrspace(8) inreg %buf) { +define void @store_bf16(bfloat %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_bf16( -; CHECK-SAME: bfloat [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: bfloat [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.bf16(bfloat [[DATA]], ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -341,9 +335,9 @@ define void @store_bf16(bfloat %data, ptr addrspace(8) inreg %buf) { ret void } -define <2 x half> @load_v2f16(ptr addrspace(8) inreg %buf) { +define <2 x half> @load_v2f16(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <2 x half> @load_v2f16( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <2 x half> [[RET]] ; @@ -352,9 +346,9 @@ define <2 x half> @load_v2f16(ptr addrspace(8) inreg %buf) { ret <2 x half> %ret } -define void @store_v2f16(<2 x half> %data, ptr addrspace(8) inreg %buf) { +define void @store_v2f16(<2 x half> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v2f16( -; CHECK-SAME: <2 x half> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: <2 x half> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half> [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -363,9 +357,9 @@ define void @store_v2f16(<2 x half> %data, ptr addrspace(8) inreg %buf) { ret void } -define <4 x bfloat> @load_v4bf16(ptr addrspace(8) inreg %buf) { +define <4 x bfloat> @load_v4bf16(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <4 x bfloat> @load_v4bf16( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call <4 x bfloat> @llvm.amdgcn.raw.ptr.buffer.load.v4bf16(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <4 x bfloat> [[RET]] ; @@ -374,9 +368,9 @@ define <4 x bfloat> @load_v4bf16(ptr addrspace(8) inreg %buf) { ret <4 x bfloat> %ret } -define void @store_v4bf16(<4 x bfloat> %data, ptr addrspace(8) inreg %buf) { +define void @store_v4bf16(<4 x bfloat> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v4bf16( -; CHECK-SAME: <4 x bfloat> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: <4 x bfloat> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4bf16(<4 x bfloat> [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -385,9 +379,9 @@ define void @store_v4bf16(<4 x bfloat> %data, ptr addrspace(8) inreg %buf) { ret void } -define <8 x half> @load_v8f16(ptr addrspace(8) inreg %buf) { +define <8 x half> @load_v8f16(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <8 x half> @load_v8f16( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call <8 x half> @llvm.amdgcn.raw.ptr.buffer.load.v8f16(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <8 x half> [[RET]] ; @@ -396,9 +390,9 @@ define <8 x half> @load_v8f16(ptr addrspace(8) inreg %buf) { ret <8 x half> %ret } -define void @store_v8f16(<8 x half> %data, ptr addrspace(8) inreg %buf) { +define void @store_v8f16(<8 x half> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v8f16( -; CHECK-SAME: <8 x half> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: <8 x half> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v8f16(<8 x half> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -407,9 +401,9 @@ define void @store_v8f16(<8 x half> %data, ptr addrspace(8) inreg %buf) { ret void } -define float @load_f32(ptr addrspace(8) inreg %buf) { +define float @load_f32(ptr addrspace(8) %buf) { ; CHECK-LABEL: define float @load_f32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret float [[RET]] ; @@ -418,9 +412,9 @@ define float @load_f32(ptr addrspace(8) inreg %buf) { ret float %ret } -define void @store_f32(float %data, ptr addrspace(8) inreg %buf) { +define void @store_f32(float %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_f32( -; CHECK-SAME: float [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: float [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -429,9 +423,9 @@ define void @store_f32(float %data, ptr addrspace(8) inreg %buf) { ret void } -define <2 x float> @load_v2f32(ptr addrspace(8) inreg %buf) { +define <2 x float> @load_v2f32(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <2 x float> @load_v2f32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <2 x float> [[RET]] ; @@ -440,9 +434,9 @@ define <2 x float> @load_v2f32(ptr addrspace(8) inreg %buf) { ret <2 x float> %ret } -define void @store_v2f32(<2 x float> %data, ptr addrspace(8) inreg %buf) { +define void @store_v2f32(<2 x float> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v2f32( -; CHECK-SAME: <2 x float> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: <2 x float> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -451,9 +445,9 @@ define void @store_v2f32(<2 x float> %data, ptr addrspace(8) inreg %buf) { ret void } -define <3 x float> @load_v3f32(ptr addrspace(8) inreg %buf) { +define <3 x float> @load_v3f32(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <3 x float> @load_v3f32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <3 x float> [[RET]] ; @@ -462,9 +456,9 @@ define <3 x float> @load_v3f32(ptr addrspace(8) inreg %buf) { ret <3 x float> %ret } -define void @store_v3f32(<3 x float> %data, ptr addrspace(8) inreg %buf) { +define void @store_v3f32(<3 x float> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v3f32( -; CHECK-SAME: <3 x float> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: <3 x float> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v3f32(<3 x float> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -473,9 +467,9 @@ define void @store_v3f32(<3 x float> %data, ptr addrspace(8) inreg %buf) { ret void } -define <4 x float> @load_v4f32(ptr addrspace(8) inreg %buf) { +define <4 x float> @load_v4f32(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <4 x float> @load_v4f32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <4 x float> [[RET]] ; @@ -484,9 +478,9 @@ define <4 x float> @load_v4f32(ptr addrspace(8) inreg %buf) { ret <4 x float> %ret } -define void @store_v4f32(<4 x float> %data, ptr addrspace(8) inreg %buf) { +define void @store_v4f32(<4 x float> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v4f32( -; CHECK-SAME: <4 x float> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: <4 x float> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -495,9 +489,9 @@ define void @store_v4f32(<4 x float> %data, ptr addrspace(8) inreg %buf) { ret void } -define ptr addrspace(0) @load_p0(ptr addrspace(8) inreg %buf) { +define ptr addrspace(0) @load_p0(ptr addrspace(8) %buf) { ; CHECK-LABEL: define ptr @load_p0( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call ptr @llvm.amdgcn.raw.ptr.buffer.load.p0(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret ptr [[RET]] ; @@ -506,9 +500,9 @@ define ptr addrspace(0) @load_p0(ptr addrspace(8) inreg %buf) { ret ptr addrspace(0) %ret } -define void @store_p0(ptr addrspace(0) %data, ptr addrspace(8) inreg %buf) { +define void @store_p0(ptr addrspace(0) %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_p0( -; CHECK-SAME: ptr [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.p0(ptr [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -517,9 +511,9 @@ define void @store_p0(ptr addrspace(0) %data, ptr addrspace(8) inreg %buf) { ret void } -define ptr addrspace(1) @load_p1(ptr addrspace(8) inreg %buf) { +define ptr addrspace(1) @load_p1(ptr addrspace(8) %buf) { ; CHECK-LABEL: define ptr addrspace(1) @load_p1( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call ptr addrspace(1) @llvm.amdgcn.raw.ptr.buffer.load.p1(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret ptr addrspace(1) [[RET]] ; @@ -528,9 +522,9 @@ define ptr addrspace(1) @load_p1(ptr addrspace(8) inreg %buf) { ret ptr addrspace(1) %ret } -define void @store_p1(ptr addrspace(1) %data, ptr addrspace(8) inreg %buf) { +define void @store_p1(ptr addrspace(1) %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_p1( -; CHECK-SAME: ptr addrspace(1) [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(1) [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.p1(ptr addrspace(1) [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -539,9 +533,9 @@ define void @store_p1(ptr addrspace(1) %data, ptr addrspace(8) inreg %buf) { ret void } -define ptr addrspace(2) @load_p2(ptr addrspace(8) inreg %buf) { +define ptr addrspace(2) @load_p2(ptr addrspace(8) %buf) { ; CHECK-LABEL: define ptr addrspace(2) @load_p2( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call ptr addrspace(2) @llvm.amdgcn.raw.ptr.buffer.load.p2(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret ptr addrspace(2) [[RET]] ; @@ -550,9 +544,9 @@ define ptr addrspace(2) @load_p2(ptr addrspace(8) inreg %buf) { ret ptr addrspace(2) %ret } -define void @store_p2(ptr addrspace(2) %data, ptr addrspace(8) inreg %buf) { +define void @store_p2(ptr addrspace(2) %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_p2( -; CHECK-SAME: ptr addrspace(2) [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(2) [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.p2(ptr addrspace(2) [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -561,9 +555,9 @@ define void @store_p2(ptr addrspace(2) %data, ptr addrspace(8) inreg %buf) { ret void } -define ptr addrspace(3) @load_p3(ptr addrspace(8) inreg %buf) { +define ptr addrspace(3) @load_p3(ptr addrspace(8) %buf) { ; CHECK-LABEL: define ptr addrspace(3) @load_p3( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call ptr addrspace(3) @llvm.amdgcn.raw.ptr.buffer.load.p3(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret ptr addrspace(3) [[RET]] ; @@ -572,9 +566,9 @@ define ptr addrspace(3) @load_p3(ptr addrspace(8) inreg %buf) { ret ptr addrspace(3) %ret } -define void @store_p3(ptr addrspace(3) %data, ptr addrspace(8) inreg %buf) { +define void @store_p3(ptr addrspace(3) %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_p3( -; CHECK-SAME: ptr addrspace(3) [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(3) [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.p3(ptr addrspace(3) [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -583,9 +577,9 @@ define void @store_p3(ptr addrspace(3) %data, ptr addrspace(8) inreg %buf) { ret void } -define ptr addrspace(4) @load_p4(ptr addrspace(8) inreg %buf) { +define ptr addrspace(4) @load_p4(ptr addrspace(8) %buf) { ; CHECK-LABEL: define ptr addrspace(4) @load_p4( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call ptr addrspace(4) @llvm.amdgcn.raw.ptr.buffer.load.p4(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret ptr addrspace(4) [[RET]] ; @@ -594,9 +588,9 @@ define ptr addrspace(4) @load_p4(ptr addrspace(8) inreg %buf) { ret ptr addrspace(4) %ret } -define void @store_p4(ptr addrspace(4) %data, ptr addrspace(8) inreg %buf) { +define void @store_p4(ptr addrspace(4) %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_p4( -; CHECK-SAME: ptr addrspace(4) [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(4) [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.p4(ptr addrspace(4) [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -605,9 +599,9 @@ define void @store_p4(ptr addrspace(4) %data, ptr addrspace(8) inreg %buf) { ret void } -define ptr addrspace(5) @load_p5(ptr addrspace(8) inreg %buf) { +define ptr addrspace(5) @load_p5(ptr addrspace(8) %buf) { ; CHECK-LABEL: define ptr addrspace(5) @load_p5( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call ptr addrspace(5) @llvm.amdgcn.raw.ptr.buffer.load.p5(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret ptr addrspace(5) [[RET]] ; @@ -616,9 +610,9 @@ define ptr addrspace(5) @load_p5(ptr addrspace(8) inreg %buf) { ret ptr addrspace(5) %ret } -define void @store_p5(ptr addrspace(5) %data, ptr addrspace(8) inreg %buf) { +define void @store_p5(ptr addrspace(5) %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_p5( -; CHECK-SAME: ptr addrspace(5) [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(5) [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.p5(ptr addrspace(5) [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -627,9 +621,9 @@ define void @store_p5(ptr addrspace(5) %data, ptr addrspace(8) inreg %buf) { ret void } -define ptr addrspace(6) @load_p6(ptr addrspace(8) inreg %buf) { +define ptr addrspace(6) @load_p6(ptr addrspace(8) %buf) { ; CHECK-LABEL: define ptr addrspace(6) @load_p6( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call ptr addrspace(6) @llvm.amdgcn.raw.ptr.buffer.load.p6(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret ptr addrspace(6) [[RET]] ; @@ -638,9 +632,9 @@ define ptr addrspace(6) @load_p6(ptr addrspace(8) inreg %buf) { ret ptr addrspace(6) %ret } -define void @store_p6(ptr addrspace(6) %data, ptr addrspace(8) inreg %buf) { +define void @store_p6(ptr addrspace(6) %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_p6( -; CHECK-SAME: ptr addrspace(6) [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(6) [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.p6(ptr addrspace(6) [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -649,9 +643,9 @@ define void @store_p6(ptr addrspace(6) %data, ptr addrspace(8) inreg %buf) { ret void } -define ptr addrspace(8) @load_p8(ptr addrspace(8) inreg %buf) { +define ptr addrspace(8) @load_p8(ptr addrspace(8) %buf) { ; CHECK-LABEL: define ptr addrspace(8) @load_p8( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call ptr addrspace(8) @llvm.amdgcn.raw.ptr.buffer.load.p8(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret ptr addrspace(8) [[RET]] ; @@ -660,9 +654,9 @@ define ptr addrspace(8) @load_p8(ptr addrspace(8) inreg %buf) { ret ptr addrspace(8) %ret } -define void @store_p8(ptr addrspace(8) %data, ptr addrspace(8) inreg %buf) { +define void @store_p8(ptr addrspace(8) %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_p8( -; CHECK-SAME: ptr addrspace(8) [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.p8(ptr addrspace(8) [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -671,9 +665,9 @@ define void @store_p8(ptr addrspace(8) %data, ptr addrspace(8) inreg %buf) { ret void } -define <2 x ptr addrspace(1)> @load_v2p1(ptr addrspace(8) inreg %buf) { +define <2 x ptr addrspace(1)> @load_v2p1(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <2 x ptr addrspace(1)> @load_v2p1( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call <2 x ptr addrspace(1)> @llvm.amdgcn.raw.ptr.buffer.load.v2p1(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <2 x ptr addrspace(1)> [[RET]] ; @@ -682,9 +676,9 @@ define <2 x ptr addrspace(1)> @load_v2p1(ptr addrspace(8) inreg %buf) { ret <2 x ptr addrspace(1)> %ret } -define void @store_v2p1(<2 x ptr addrspace(1)> %data, ptr addrspace(8) inreg %buf) { +define void @store_v2p1(<2 x ptr addrspace(1)> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v2p1( -; CHECK-SAME: <2 x ptr addrspace(1)> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: <2 x ptr addrspace(1)> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2p1(<2 x ptr addrspace(1)> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -693,9 +687,9 @@ define void @store_v2p1(<2 x ptr addrspace(1)> %data, ptr addrspace(8) inreg %bu ret void } -define <2 x ptr addrspace(5)> @load_v2p5(ptr addrspace(8) inreg %buf) { +define <2 x ptr addrspace(5)> @load_v2p5(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <2 x ptr addrspace(5)> @load_v2p5( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call <2 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v2p5(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <2 x ptr addrspace(5)> [[RET]] ; @@ -704,9 +698,9 @@ define <2 x ptr addrspace(5)> @load_v2p5(ptr addrspace(8) inreg %buf) { ret <2 x ptr addrspace(5)> %ret } -define void @store_v2p5(<2 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %buf) { +define void @store_v2p5(<2 x ptr addrspace(5)> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v2p5( -; CHECK-SAME: <2 x ptr addrspace(5)> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: <2 x ptr addrspace(5)> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2p5(<2 x ptr addrspace(5)> [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -715,9 +709,9 @@ define void @store_v2p5(<2 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %bu ret void } -define <3 x ptr addrspace(5)> @load_v3p5(ptr addrspace(8) inreg %buf) { +define <3 x ptr addrspace(5)> @load_v3p5(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <3 x ptr addrspace(5)> @load_v3p5( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call <3 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v3p5(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <3 x ptr addrspace(5)> [[RET]] ; @@ -726,9 +720,9 @@ define <3 x ptr addrspace(5)> @load_v3p5(ptr addrspace(8) inreg %buf) { ret <3 x ptr addrspace(5)> %ret } -define void @store_v3p5(<3 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %buf) { +define void @store_v3p5(<3 x ptr addrspace(5)> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v3p5( -; CHECK-SAME: <3 x ptr addrspace(5)> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: <3 x ptr addrspace(5)> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v3p5(<3 x ptr addrspace(5)> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -737,9 +731,9 @@ define void @store_v3p5(<3 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %bu ret void } -define <4 x ptr addrspace(5)> @load_v4p5(ptr addrspace(8) inreg %buf) { +define <4 x ptr addrspace(5)> @load_v4p5(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <4 x ptr addrspace(5)> @load_v4p5( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[RET:%.*]] = call <4 x ptr addrspace(5)> @llvm.amdgcn.raw.ptr.buffer.load.v4p5(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <4 x ptr addrspace(5)> [[RET]] ; @@ -748,9 +742,9 @@ define <4 x ptr addrspace(5)> @load_v4p5(ptr addrspace(8) inreg %buf) { ret <4 x ptr addrspace(5)> %ret } -define void @store_v4p5(<4 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %buf) { +define void @store_v4p5(<4 x ptr addrspace(5)> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v4p5( -; CHECK-SAME: <4 x ptr addrspace(5)> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: <4 x ptr addrspace(5)> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4p5(<4 x ptr addrspace(5)> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; @@ -761,11 +755,10 @@ define void @store_v4p5(<4 x ptr addrspace(5)> %data, ptr addrspace(8) inreg %bu ;;; 3 words in a short type. These need to be bitcast to <3 x i32> to be supported. -define <6 x half> @load_v6f16(ptr addrspace(8) inreg %buf) { +define <6 x half> @load_v6f16(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <6 x half> @load_v6f16( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = bitcast <3 x i32> [[RET_LOADABLE]] to <6 x half> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <6 x half> @llvm.amdgcn.raw.ptr.buffer.load.v6f16(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <6 x half> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -773,11 +766,10 @@ define <6 x half> @load_v6f16(ptr addrspace(8) inreg %buf) { ret <6 x half> %ret } -define void @store_v6f16(<6 x half> %data, ptr addrspace(8) inreg %buf) { +define void @store_v6f16(<6 x half> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v6f16( -; CHECK-SAME: <6 x half> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_STORABLE:%.*]] = bitcast <6 x half> [[DATA]] to <3 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> [[DATA_STORABLE]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: <6 x half> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v6f16(<6 x half> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -787,14 +779,10 @@ define void @store_v6f16(<6 x half> %data, ptr addrspace(8) inreg %buf) { ;;; Long types (32 bit elements). Must be split into multiple operations. -define <5 x float> @load_v5f32(ptr addrspace(8) inreg %buf) { +define <5 x float> @load_v5f32(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <5 x float> @load_v5f32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <4 x float> [[RET_OFF_0]], <4 x float> poison, <5 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <5 x float> poison, <5 x float> [[RET_EXT_0]], <5 x i32> -; CHECK-NEXT: [[RET_OFF_16:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = insertelement <5 x float> [[RET_PARTS_0]], float [[RET_OFF_16]], i64 4 +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <5 x float> @llvm.amdgcn.raw.ptr.buffer.load.v5f32(ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <5 x float> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -802,13 +790,10 @@ define <5 x float> @load_v5f32(ptr addrspace(8) inreg %buf) { ret <5 x float> %ret } -define void @store_v5f32(<5 x float> %data, ptr addrspace(8) inreg %buf) { +define void @store_v5f32(<5 x float> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v5f32( -; CHECK-SAME: <5 x float> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <5 x float> [[DATA]], <5 x float> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> [[DATA_SLICE_0]], ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_4:%.*]] = extractelement <5 x float> [[DATA]], i64 4 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[DATA_SLICE_4]], ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) +; CHECK-SAME: <5 x float> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v5f32(<5 x float> [[DATA]], ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -816,15 +801,10 @@ define void @store_v5f32(<5 x float> %data, ptr addrspace(8) inreg %buf) { ret void } -define <6 x float> @load_v6f32(ptr addrspace(8) inreg %buf) { +define <6 x float> @load_v6f32(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <6 x float> @load_v6f32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <4 x float> [[RET_OFF_0]], <4 x float> poison, <6 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <6 x float> poison, <6 x float> [[RET_EXT_0]], <6 x i32> -; CHECK-NEXT: [[RET_OFF_16:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_4:%.*]] = shufflevector <2 x float> [[RET_OFF_16]], <2 x float> poison, <6 x i32> -; CHECK-NEXT: [[RET:%.*]] = shufflevector <6 x float> [[RET_PARTS_0]], <6 x float> [[RET_EXT_4]], <6 x i32> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <6 x float> @llvm.amdgcn.raw.ptr.buffer.load.v6f32(ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <6 x float> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -832,13 +812,10 @@ define <6 x float> @load_v6f32(ptr addrspace(8) inreg %buf) { ret <6 x float> %ret } -define void @store_v6f32(<6 x float> %data, ptr addrspace(8) inreg %buf) { +define void @store_v6f32(<6 x float> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v6f32( -; CHECK-SAME: <6 x float> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <6 x float> [[DATA]], <6 x float> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> [[DATA_SLICE_0]], ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_4:%.*]] = shufflevector <6 x float> [[DATA]], <6 x float> poison, <2 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> [[DATA_SLICE_4]], ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) +; CHECK-SAME: <6 x float> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v6f32(<6 x float> [[DATA]], ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -846,15 +823,10 @@ define void @store_v6f32(<6 x float> %data, ptr addrspace(8) inreg %buf) { ret void } -define <7 x float> @load_v7f32(ptr addrspace(8) inreg %buf) { +define <7 x float> @load_v7f32(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <7 x float> @load_v7f32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <4 x float> [[RET_OFF_0]], <4 x float> poison, <7 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <7 x float> poison, <7 x float> [[RET_EXT_0]], <7 x i32> -; CHECK-NEXT: [[RET_OFF_16:%.*]] = call <3 x float> @llvm.amdgcn.raw.ptr.buffer.load.v3f32(ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_4:%.*]] = shufflevector <3 x float> [[RET_OFF_16]], <3 x float> poison, <7 x i32> -; CHECK-NEXT: [[RET:%.*]] = shufflevector <7 x float> [[RET_PARTS_0]], <7 x float> [[RET_EXT_4]], <7 x i32> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <7 x float> @llvm.amdgcn.raw.ptr.buffer.load.v7f32(ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <7 x float> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -862,13 +834,10 @@ define <7 x float> @load_v7f32(ptr addrspace(8) inreg %buf) { ret <7 x float> %ret } -define void @store_v7f32(<7 x float> %data, ptr addrspace(8) inreg %buf) { +define void @store_v7f32(<7 x float> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v7f32( -; CHECK-SAME: <7 x float> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <7 x float> [[DATA]], <7 x float> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> [[DATA_SLICE_0]], ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_4:%.*]] = shufflevector <7 x float> [[DATA]], <7 x float> poison, <3 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v3f32(<3 x float> [[DATA_SLICE_4]], ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) +; CHECK-SAME: <7 x float> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v7f32(<7 x float> [[DATA]], ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -876,15 +845,10 @@ define void @store_v7f32(<7 x float> %data, ptr addrspace(8) inreg %buf) { ret void } -define <8 x float> @load_v8f32(ptr addrspace(8) inreg %buf) { +define <8 x float> @load_v8f32(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <8 x float> @load_v8f32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <4 x float> [[RET_OFF_0]], <4 x float> poison, <8 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <8 x float> poison, <8 x float> [[RET_EXT_0]], <8 x i32> -; CHECK-NEXT: [[RET_OFF_16:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_4:%.*]] = shufflevector <4 x float> [[RET_OFF_16]], <4 x float> poison, <8 x i32> -; CHECK-NEXT: [[RET:%.*]] = shufflevector <8 x float> [[RET_PARTS_0]], <8 x float> [[RET_EXT_4]], <8 x i32> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <8 x float> @llvm.amdgcn.raw.ptr.buffer.load.v8f32(ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <8 x float> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -892,13 +856,10 @@ define <8 x float> @load_v8f32(ptr addrspace(8) inreg %buf) { ret <8 x float> %ret } -define void @store_v8f32(<8 x float> %data, ptr addrspace(8) inreg %buf) { +define void @store_v8f32(<8 x float> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v8f32( -; CHECK-SAME: <8 x float> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <8 x float> [[DATA]], <8 x float> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> [[DATA_SLICE_0]], ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_4:%.*]] = shufflevector <8 x float> [[DATA]], <8 x float> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> [[DATA_SLICE_4]], ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) +; CHECK-SAME: <8 x float> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v8f32(<8 x float> [[DATA]], ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -906,18 +867,10 @@ define void @store_v8f32(<8 x float> %data, ptr addrspace(8) inreg %buf) { ret void } -define <10 x float> @load_v10f32(ptr addrspace(8) inreg %buf) { +define <10 x float> @load_v10f32(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <10 x float> @load_v10f32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) align 64 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <4 x float> [[RET_OFF_0]], <4 x float> poison, <10 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <10 x float> poison, <10 x float> [[RET_EXT_0]], <10 x i32> -; CHECK-NEXT: [[RET_OFF_16:%.*]] = call <4 x float> @llvm.amdgcn.raw.ptr.buffer.load.v4f32(ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_4:%.*]] = shufflevector <4 x float> [[RET_OFF_16]], <4 x float> poison, <10 x i32> -; CHECK-NEXT: [[RET_PARTS_4:%.*]] = shufflevector <10 x float> [[RET_PARTS_0]], <10 x float> [[RET_EXT_4]], <10 x i32> -; CHECK-NEXT: [[RET_OFF_32:%.*]] = call <2 x float> @llvm.amdgcn.raw.ptr.buffer.load.v2f32(ptr addrspace(8) align 32 [[BUF]], i32 32, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_8:%.*]] = shufflevector <2 x float> [[RET_OFF_32]], <2 x float> poison, <10 x i32> -; CHECK-NEXT: [[RET:%.*]] = shufflevector <10 x float> [[RET_PARTS_4]], <10 x float> [[RET_EXT_8]], <10 x i32> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <10 x float> @llvm.amdgcn.raw.ptr.buffer.load.v10f32(ptr addrspace(8) align 64 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <10 x float> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -925,15 +878,10 @@ define <10 x float> @load_v10f32(ptr addrspace(8) inreg %buf) { ret <10 x float> %ret } -define void @store_v10f32(<10 x float> %data, ptr addrspace(8) inreg %buf) { +define void @store_v10f32(<10 x float> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v10f32( -; CHECK-SAME: <10 x float> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <10 x float> [[DATA]], <10 x float> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> [[DATA_SLICE_0]], ptr addrspace(8) align 64 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_4:%.*]] = shufflevector <10 x float> [[DATA]], <10 x float> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4f32(<4 x float> [[DATA_SLICE_4]], ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_8:%.*]] = shufflevector <10 x float> [[DATA]], <10 x float> poison, <2 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2f32(<2 x float> [[DATA_SLICE_8]], ptr addrspace(8) align 32 [[BUF]], i32 32, i32 0, i32 0) +; CHECK-SAME: <10 x float> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v10f32(<10 x float> [[DATA]], ptr addrspace(8) align 64 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -941,15 +889,10 @@ define void @store_v10f32(<10 x float> %data, ptr addrspace(8) inreg %buf) { ret void } -define <6 x i32> @load_v6i32(ptr addrspace(8) inreg %buf) { +define <6 x i32> @load_v6i32(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <6 x i32> @load_v6i32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <4 x i32> [[RET_OFF_0]], <4 x i32> poison, <6 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <6 x i32> poison, <6 x i32> [[RET_EXT_0]], <6 x i32> -; CHECK-NEXT: [[RET_OFF_16:%.*]] = call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_4:%.*]] = shufflevector <2 x i32> [[RET_OFF_16]], <2 x i32> poison, <6 x i32> -; CHECK-NEXT: [[RET:%.*]] = shufflevector <6 x i32> [[RET_PARTS_0]], <6 x i32> [[RET_EXT_4]], <6 x i32> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <6 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v6i32(ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <6 x i32> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -957,13 +900,10 @@ define <6 x i32> @load_v6i32(ptr addrspace(8) inreg %buf) { ret <6 x i32> %ret } -define void @store_v6i32(<6 x i32> %data, ptr addrspace(8) inreg %buf) { +define void @store_v6i32(<6 x i32> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v6i32( -; CHECK-SAME: <6 x i32> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <6 x i32> [[DATA]], <6 x i32> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DATA_SLICE_0]], ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_4:%.*]] = shufflevector <6 x i32> [[DATA]], <6 x i32> poison, <2 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> [[DATA_SLICE_4]], ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) +; CHECK-SAME: <6 x i32> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v6i32(<6 x i32> [[DATA]], ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -971,15 +911,10 @@ define void @store_v6i32(<6 x i32> %data, ptr addrspace(8) inreg %buf) { ret void } -define <4 x ptr addrspace(1)> @load_v4p1(ptr addrspace(8) inreg %buf) { +define <4 x ptr addrspace(1)> @load_v4p1(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <4 x ptr addrspace(1)> @load_v4p1( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call <2 x ptr addrspace(1)> @llvm.amdgcn.raw.ptr.buffer.load.v2p1(ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <2 x ptr addrspace(1)> [[RET_OFF_0]], <2 x ptr addrspace(1)> poison, <4 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <4 x ptr addrspace(1)> poison, <4 x ptr addrspace(1)> [[RET_EXT_0]], <4 x i32> -; CHECK-NEXT: [[RET_OFF_16:%.*]] = call <2 x ptr addrspace(1)> @llvm.amdgcn.raw.ptr.buffer.load.v2p1(ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_2:%.*]] = shufflevector <2 x ptr addrspace(1)> [[RET_OFF_16]], <2 x ptr addrspace(1)> poison, <4 x i32> -; CHECK-NEXT: [[RET:%.*]] = shufflevector <4 x ptr addrspace(1)> [[RET_PARTS_0]], <4 x ptr addrspace(1)> [[RET_EXT_2]], <4 x i32> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <4 x ptr addrspace(1)> @llvm.amdgcn.raw.ptr.buffer.load.v4p1(ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <4 x ptr addrspace(1)> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -987,13 +922,10 @@ define <4 x ptr addrspace(1)> @load_v4p1(ptr addrspace(8) inreg %buf) { ret <4 x ptr addrspace(1)> %ret } -define void @store_v4p1(<4 x ptr addrspace(1)> %data, ptr addrspace(8) inreg %buf) { +define void @store_v4p1(<4 x ptr addrspace(1)> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v4p1( -; CHECK-SAME: <4 x ptr addrspace(1)> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <4 x ptr addrspace(1)> [[DATA]], <4 x ptr addrspace(1)> poison, <2 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2p1(<2 x ptr addrspace(1)> [[DATA_SLICE_0]], ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_2:%.*]] = shufflevector <4 x ptr addrspace(1)> [[DATA]], <4 x ptr addrspace(1)> poison, <2 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2p1(<2 x ptr addrspace(1)> [[DATA_SLICE_2]], ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) +; CHECK-SAME: <4 x ptr addrspace(1)> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4p1(<4 x ptr addrspace(1)> [[DATA]], ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1003,11 +935,10 @@ define void @store_v4p1(<4 x ptr addrspace(1)> %data, ptr addrspace(8) inreg %bu ;;; Uneven types with 16-bit elements. Require splitting into multiple operations. -define <1 x i16> @load_v1i16(ptr addrspace(8) inreg %buf) { +define <1 x i16> @load_v1i16(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <1 x i16> @load_v1i16( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = bitcast i16 [[RET_LOADABLE]] to <1 x i16> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <1 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v1i16(ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <1 x i16> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1015,11 +946,10 @@ define <1 x i16> @load_v1i16(ptr addrspace(8) inreg %buf) { ret <1 x i16> %ret } -define void @store_v1i16(<1 x i16> %data, ptr addrspace(8) inreg %buf) { +define void @store_v1i16(<1 x i16> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v1i16( -; CHECK-SAME: <1 x i16> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_STORABLE:%.*]] = bitcast <1 x i16> [[DATA]] to i16 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 [[DATA_STORABLE]], ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: <1 x i16> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v1i16(<1 x i16> [[DATA]], ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1027,14 +957,10 @@ define void @store_v1i16(<1 x i16> %data, ptr addrspace(8) inreg %buf) { ret void } -define <3 x i16> @load_v3i16(ptr addrspace(8) inreg %buf) { +define <3 x i16> @load_v3i16(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <3 x i16> @load_v3i16( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call <2 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v2i16(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <2 x i16> [[RET_OFF_0]], <2 x i16> poison, <3 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <3 x i16> poison, <3 x i16> [[RET_EXT_0]], <3 x i32> -; CHECK-NEXT: [[RET_OFF_4:%.*]] = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) align 4 [[BUF]], i32 4, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = insertelement <3 x i16> [[RET_PARTS_0]], i16 [[RET_OFF_4]], i64 2 +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <3 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v3i16(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <3 x i16> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1042,13 +968,10 @@ define <3 x i16> @load_v3i16(ptr addrspace(8) inreg %buf) { ret <3 x i16> %ret } -define void @store_v3i16(<3 x i16> %data, ptr addrspace(8) inreg %buf) { +define void @store_v3i16(<3 x i16> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v3i16( -; CHECK-SAME: <3 x i16> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <3 x i16> [[DATA]], <3 x i16> poison, <2 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i16(<2 x i16> [[DATA_SLICE_0]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_2:%.*]] = extractelement <3 x i16> [[DATA]], i64 2 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 [[DATA_SLICE_2]], ptr addrspace(8) align 4 [[BUF]], i32 4, i32 0, i32 0) +; CHECK-SAME: <3 x i16> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v3i16(<3 x i16> [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1056,14 +979,10 @@ define void @store_v3i16(<3 x i16> %data, ptr addrspace(8) inreg %buf) { ret void } -define <5 x i16> @load_v5i16(ptr addrspace(8) inreg %buf) { +define <5 x i16> @load_v5i16(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <5 x i16> @load_v5i16( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call <4 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v4i16(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <4 x i16> [[RET_OFF_0]], <4 x i16> poison, <5 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <5 x i16> poison, <5 x i16> [[RET_EXT_0]], <5 x i32> -; CHECK-NEXT: [[RET_OFF_8:%.*]] = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) align 8 [[BUF]], i32 8, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = insertelement <5 x i16> [[RET_PARTS_0]], i16 [[RET_OFF_8]], i64 4 +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <5 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v5i16(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <5 x i16> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1071,13 +990,10 @@ define <5 x i16> @load_v5i16(ptr addrspace(8) inreg %buf) { ret <5 x i16> %ret } -define void @store_v5i16(<5 x i16> %data, ptr addrspace(8) inreg %buf) { +define void @store_v5i16(<5 x i16> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v5i16( -; CHECK-SAME: <5 x i16> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <5 x i16> [[DATA]], <5 x i16> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i16(<4 x i16> [[DATA_SLICE_0]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_4:%.*]] = extractelement <5 x i16> [[DATA]], i64 4 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 [[DATA_SLICE_4]], ptr addrspace(8) align 8 [[BUF]], i32 8, i32 0, i32 0) +; CHECK-SAME: <5 x i16> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v5i16(<5 x i16> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1085,11 +1001,10 @@ define void @store_v5i16(<5 x i16> %data, ptr addrspace(8) inreg %buf) { ret void } -define <6 x i16> @load_v6i16(ptr addrspace(8) inreg %buf) { +define <6 x i16> @load_v6i16(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <6 x i16> @load_v6i16( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = bitcast <3 x i32> [[RET_LOADABLE]] to <6 x i16> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <6 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v6i16(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <6 x i16> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1097,11 +1012,10 @@ define <6 x i16> @load_v6i16(ptr addrspace(8) inreg %buf) { ret <6 x i16> %ret } -define void @store_v6i16(<6 x i16> %data, ptr addrspace(8) inreg %buf) { +define void @store_v6i16(<6 x i16> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v6i16( -; CHECK-SAME: <6 x i16> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_STORABLE:%.*]] = bitcast <6 x i16> [[DATA]] to <3 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> [[DATA_STORABLE]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: <6 x i16> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v6i16(<6 x i16> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1109,15 +1023,10 @@ define void @store_v6i16(<6 x i16> %data, ptr addrspace(8) inreg %buf) { ret void } -define <7 x i16> @load_v7i16(ptr addrspace(8) inreg %buf) { +define <7 x i16> @load_v7i16(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <7 x i16> @load_v7i16( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_OFF_0_FROM_LOADABLE:%.*]] = bitcast <3 x i32> [[RET_OFF_0]] to <6 x i16> -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <6 x i16> [[RET_OFF_0_FROM_LOADABLE]], <6 x i16> poison, <7 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <7 x i16> poison, <7 x i16> [[RET_EXT_0]], <7 x i32> -; CHECK-NEXT: [[RET_OFF_12:%.*]] = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) align 4 [[BUF]], i32 12, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = insertelement <7 x i16> [[RET_PARTS_0]], i16 [[RET_OFF_12]], i64 6 +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <7 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v7i16(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <7 x i16> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1125,14 +1034,10 @@ define <7 x i16> @load_v7i16(ptr addrspace(8) inreg %buf) { ret <7 x i16> %ret } -define void @store_v7i16(<7 x i16> %data, ptr addrspace(8) inreg %buf) { +define void @store_v7i16(<7 x i16> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v7i16( -; CHECK-SAME: <7 x i16> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <7 x i16> [[DATA]], <7 x i16> poison, <6 x i32> -; CHECK-NEXT: [[DATA_SLICE_0_STORABLE:%.*]] = bitcast <6 x i16> [[DATA_SLICE_0]] to <3 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> [[DATA_SLICE_0_STORABLE]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_6:%.*]] = extractelement <7 x i16> [[DATA]], i64 6 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 [[DATA_SLICE_6]], ptr addrspace(8) align 4 [[BUF]], i32 12, i32 0, i32 0) +; CHECK-SAME: <7 x i16> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v7i16(<7 x i16> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1140,14 +1045,10 @@ define void @store_v7i16(<7 x i16> %data, ptr addrspace(8) inreg %buf) { ret void } -define <9 x i16> @load_v9i16(ptr addrspace(8) inreg %buf) { +define <9 x i16> @load_v9i16(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <9 x i16> @load_v9i16( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call <8 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v8i16(ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <8 x i16> [[RET_OFF_0]], <8 x i16> poison, <9 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <9 x i16> poison, <9 x i16> [[RET_EXT_0]], <9 x i32> -; CHECK-NEXT: [[RET_OFF_16:%.*]] = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = insertelement <9 x i16> [[RET_PARTS_0]], i16 [[RET_OFF_16]], i64 8 +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <9 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v9i16(ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <9 x i16> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1155,13 +1056,10 @@ define <9 x i16> @load_v9i16(ptr addrspace(8) inreg %buf) { ret <9 x i16> %ret } -define void @store_v9i16(<9 x i16> %data, ptr addrspace(8) inreg %buf) { +define void @store_v9i16(<9 x i16> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v9i16( -; CHECK-SAME: <9 x i16> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <9 x i16> [[DATA]], <9 x i16> poison, <8 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v8i16(<8 x i16> [[DATA_SLICE_0]], ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_8:%.*]] = extractelement <9 x i16> [[DATA]], i64 8 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 [[DATA_SLICE_8]], ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) +; CHECK-SAME: <9 x i16> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v9i16(<9 x i16> [[DATA]], ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1173,11 +1071,10 @@ define void @store_v9i16(<9 x i16> %data, ptr addrspace(8) inreg %buf) { ;;; - Split into multiple operations ;;; - Bitcast if they have a natively supported width -define <1 x i8> @load_v1i8(ptr addrspace(8) inreg %buf) { +define <1 x i8> @load_v1i8(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <1 x i8> @load_v1i8( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = bitcast i8 [[RET_LOADABLE]] to <1 x i8> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <1 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v1i8(ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <1 x i8> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1185,11 +1082,10 @@ define <1 x i8> @load_v1i8(ptr addrspace(8) inreg %buf) { ret <1 x i8> %ret } -define void @store_v1i8(<1 x i8> %data, ptr addrspace(8) inreg %buf) { +define void @store_v1i8(<1 x i8> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v1i8( -; CHECK-SAME: <1 x i8> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_LEGAL:%.*]] = bitcast <1 x i8> [[DATA]] to i8 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 [[DATA_LEGAL]], ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: <1 x i8> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v1i8(<1 x i8> [[DATA]], ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1197,11 +1093,10 @@ define void @store_v1i8(<1 x i8> %data, ptr addrspace(8) inreg %buf) { ret void } -define <2 x i8> @load_v2i8(ptr addrspace(8) inreg %buf) { +define <2 x i8> @load_v2i8(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <2 x i8> @load_v2i8( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = bitcast i16 [[RET_LOADABLE]] to <2 x i8> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <2 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v2i8(ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <2 x i8> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1209,11 +1104,10 @@ define <2 x i8> @load_v2i8(ptr addrspace(8) inreg %buf) { ret <2 x i8> %ret } -define void @store_v2i8(<2 x i8> %data, ptr addrspace(8) inreg %buf) { +define void @store_v2i8(<2 x i8> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v2i8( -; CHECK-SAME: <2 x i8> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_LEGAL:%.*]] = bitcast <2 x i8> [[DATA]] to i16 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 [[DATA_LEGAL]], ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: <2 x i8> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i8(<2 x i8> [[DATA]], ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1221,15 +1115,10 @@ define void @store_v2i8(<2 x i8> %data, ptr addrspace(8) inreg %buf) { ret void } -define <3 x i8> @load_v3i8(ptr addrspace(8) inreg %buf) { +define <3 x i8> @load_v3i8(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <3 x i8> @load_v3i8( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_OFF_0_FROM_LOADABLE:%.*]] = bitcast i16 [[RET_OFF_0]] to <2 x i8> -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <2 x i8> [[RET_OFF_0_FROM_LOADABLE]], <2 x i8> poison, <3 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <3 x i8> poison, <3 x i8> [[RET_EXT_0]], <3 x i32> -; CHECK-NEXT: [[RET_OFF_2:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) align 2 [[BUF]], i32 2, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = insertelement <3 x i8> [[RET_PARTS_0]], i8 [[RET_OFF_2]], i64 2 +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <3 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v3i8(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <3 x i8> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1237,14 +1126,10 @@ define <3 x i8> @load_v3i8(ptr addrspace(8) inreg %buf) { ret <3 x i8> %ret } -define void @store_v3i8(<3 x i8> %data, ptr addrspace(8) inreg %buf) { +define void @store_v3i8(<3 x i8> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v3i8( -; CHECK-SAME: <3 x i8> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <3 x i8> [[DATA]], <3 x i8> poison, <2 x i32> -; CHECK-NEXT: [[DATA_SLICE_0_STORABLE:%.*]] = bitcast <2 x i8> [[DATA_SLICE_0]] to i16 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 [[DATA_SLICE_0_STORABLE]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_2:%.*]] = extractelement <3 x i8> [[DATA]], i64 2 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 [[DATA_SLICE_2]], ptr addrspace(8) align 2 [[BUF]], i32 2, i32 0, i32 0) +; CHECK-SAME: <3 x i8> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v3i8(<3 x i8> [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1252,11 +1137,10 @@ define void @store_v3i8(<3 x i8> %data, ptr addrspace(8) inreg %buf) { ret void } -define <4 x i8> @load_v4i8(ptr addrspace(8) inreg %buf) { +define <4 x i8> @load_v4i8(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <4 x i8> @load_v4i8( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = bitcast i32 [[RET_LOADABLE]] to <4 x i8> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <4 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v4i8(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <4 x i8> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1264,11 +1148,10 @@ define <4 x i8> @load_v4i8(ptr addrspace(8) inreg %buf) { ret <4 x i8> %ret } -define void @store_v4i8(<4 x i8> %data, ptr addrspace(8) inreg %buf) { +define void @store_v4i8(<4 x i8> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v4i8( -; CHECK-SAME: <4 x i8> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_LEGAL:%.*]] = bitcast <4 x i8> [[DATA]] to i32 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[DATA_LEGAL]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: <4 x i8> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i8(<4 x i8> [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1276,15 +1159,10 @@ define void @store_v4i8(<4 x i8> %data, ptr addrspace(8) inreg %buf) { ret void } -define <5 x i8> @load_v5i8(ptr addrspace(8) inreg %buf) { +define <5 x i8> @load_v5i8(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <5 x i8> @load_v5i8( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_OFF_0_FROM_LOADABLE:%.*]] = bitcast i32 [[RET_OFF_0]] to <4 x i8> -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <4 x i8> [[RET_OFF_0_FROM_LOADABLE]], <4 x i8> poison, <5 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <5 x i8> poison, <5 x i8> [[RET_EXT_0]], <5 x i32> -; CHECK-NEXT: [[RET_OFF_4:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) align 4 [[BUF]], i32 4, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = insertelement <5 x i8> [[RET_PARTS_0]], i8 [[RET_OFF_4]], i64 4 +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <5 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v5i8(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <5 x i8> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1292,14 +1170,10 @@ define <5 x i8> @load_v5i8(ptr addrspace(8) inreg %buf) { ret <5 x i8> %ret } -define void @store_v5i8(<5 x i8> %data, ptr addrspace(8) inreg %buf) { +define void @store_v5i8(<5 x i8> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v5i8( -; CHECK-SAME: <5 x i8> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <5 x i8> [[DATA]], <5 x i8> poison, <4 x i32> -; CHECK-NEXT: [[DATA_SLICE_0_STORABLE:%.*]] = bitcast <4 x i8> [[DATA_SLICE_0]] to i32 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[DATA_SLICE_0_STORABLE]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_4:%.*]] = extractelement <5 x i8> [[DATA]], i64 4 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 [[DATA_SLICE_4]], ptr addrspace(8) align 4 [[BUF]], i32 4, i32 0, i32 0) +; CHECK-SAME: <5 x i8> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v5i8(<5 x i8> [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1307,15 +1181,10 @@ define void @store_v5i8(<5 x i8> %data, ptr addrspace(8) inreg %buf) { ret void } -define <6 x i8> @load_v6i8(ptr addrspace(8) inreg %buf) { +define <6 x i8> @load_v6i8(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <6 x i8> @load_v6i8( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call <2 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v2i16(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <2 x i16> [[RET_OFF_0]], <2 x i16> poison, <3 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <3 x i16> poison, <3 x i16> [[RET_EXT_0]], <3 x i32> -; CHECK-NEXT: [[RET_OFF_4:%.*]] = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) align 4 [[BUF]], i32 4, i32 0, i32 0) -; CHECK-NEXT: [[RET_SLICE_2:%.*]] = insertelement <3 x i16> [[RET_PARTS_0]], i16 [[RET_OFF_4]], i64 2 -; CHECK-NEXT: [[RET:%.*]] = bitcast <3 x i16> [[RET_SLICE_2]] to <6 x i8> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <6 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v6i8(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <6 x i8> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1323,14 +1192,10 @@ define <6 x i8> @load_v6i8(ptr addrspace(8) inreg %buf) { ret <6 x i8> %ret } -define void @store_v6i8(<6 x i8> %data, ptr addrspace(8) inreg %buf) { +define void @store_v6i8(<6 x i8> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v6i8( -; CHECK-SAME: <6 x i8> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_LEGAL:%.*]] = bitcast <6 x i8> [[DATA]] to <3 x i16> -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <3 x i16> [[DATA_LEGAL]], <3 x i16> poison, <2 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i16(<2 x i16> [[DATA_SLICE_0]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_2:%.*]] = extractelement <3 x i16> [[DATA_LEGAL]], i64 2 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 [[DATA_SLICE_2]], ptr addrspace(8) align 4 [[BUF]], i32 4, i32 0, i32 0) +; CHECK-SAME: <6 x i8> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v6i8(<6 x i8> [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1338,19 +1203,10 @@ define void @store_v6i8(<6 x i8> %data, ptr addrspace(8) inreg %buf) { ret void } -define <7 x i8> @load_v7i8(ptr addrspace(8) inreg %buf) { +define <7 x i8> @load_v7i8(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <7 x i8> @load_v7i8( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_OFF_0_FROM_LOADABLE:%.*]] = bitcast i32 [[RET_OFF_0]] to <4 x i8> -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <4 x i8> [[RET_OFF_0_FROM_LOADABLE]], <4 x i8> poison, <7 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <7 x i8> poison, <7 x i8> [[RET_EXT_0]], <7 x i32> -; CHECK-NEXT: [[RET_OFF_4:%.*]] = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) align 4 [[BUF]], i32 4, i32 0, i32 0) -; CHECK-NEXT: [[RET_OFF_4_FROM_LOADABLE:%.*]] = bitcast i16 [[RET_OFF_4]] to <2 x i8> -; CHECK-NEXT: [[RET_EXT_4:%.*]] = shufflevector <2 x i8> [[RET_OFF_4_FROM_LOADABLE]], <2 x i8> poison, <7 x i32> -; CHECK-NEXT: [[RET_PARTS_4:%.*]] = shufflevector <7 x i8> [[RET_PARTS_0]], <7 x i8> [[RET_EXT_4]], <7 x i32> -; CHECK-NEXT: [[RET_OFF_6:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) align 2 [[BUF]], i32 6, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = insertelement <7 x i8> [[RET_PARTS_4]], i8 [[RET_OFF_6]], i64 6 +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <7 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v7i8(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <7 x i8> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1358,17 +1214,10 @@ define <7 x i8> @load_v7i8(ptr addrspace(8) inreg %buf) { ret <7 x i8> %ret } -define void @store_v7i8(<7 x i8> %data, ptr addrspace(8) inreg %buf) { +define void @store_v7i8(<7 x i8> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v7i8( -; CHECK-SAME: <7 x i8> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <7 x i8> [[DATA]], <7 x i8> poison, <4 x i32> -; CHECK-NEXT: [[DATA_SLICE_0_STORABLE:%.*]] = bitcast <4 x i8> [[DATA_SLICE_0]] to i32 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[DATA_SLICE_0_STORABLE]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_4:%.*]] = shufflevector <7 x i8> [[DATA]], <7 x i8> poison, <2 x i32> -; CHECK-NEXT: [[DATA_SLICE_4_STORABLE:%.*]] = bitcast <2 x i8> [[DATA_SLICE_4]] to i16 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 [[DATA_SLICE_4_STORABLE]], ptr addrspace(8) align 4 [[BUF]], i32 4, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_6:%.*]] = extractelement <7 x i8> [[DATA]], i64 6 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 [[DATA_SLICE_6]], ptr addrspace(8) align 2 [[BUF]], i32 6, i32 0, i32 0) +; CHECK-SAME: <7 x i8> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v7i8(<7 x i8> [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1376,11 +1225,10 @@ define void @store_v7i8(<7 x i8> %data, ptr addrspace(8) inreg %buf) { ret void } -define <8 x i8> @load_v8i8(ptr addrspace(8) inreg %buf) { +define <8 x i8> @load_v8i8(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <8 x i8> @load_v8i8( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = bitcast <2 x i32> [[RET_LOADABLE]] to <8 x i8> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <8 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v8i8(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <8 x i8> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1388,11 +1236,10 @@ define <8 x i8> @load_v8i8(ptr addrspace(8) inreg %buf) { ret <8 x i8> %ret } -define void @store_v8i8(<8 x i8> %data, ptr addrspace(8) inreg %buf) { +define void @store_v8i8(<8 x i8> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v8i8( -; CHECK-SAME: <8 x i8> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_LEGAL:%.*]] = bitcast <8 x i8> [[DATA]] to <2 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> [[DATA_LEGAL]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: <8 x i8> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v8i8(<8 x i8> [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1400,11 +1247,10 @@ define void @store_v8i8(<8 x i8> %data, ptr addrspace(8) inreg %buf) { ret void } -define <12 x i8> @load_v12i8(ptr addrspace(8) inreg %buf) { +define <12 x i8> @load_v12i8(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <12 x i8> @load_v12i8( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = bitcast <3 x i32> [[RET_LOADABLE]] to <12 x i8> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <12 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v12i8(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <12 x i8> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1412,11 +1258,10 @@ define <12 x i8> @load_v12i8(ptr addrspace(8) inreg %buf) { ret <12 x i8> %ret } -define void @store_v12i8(<12 x i8> %data, ptr addrspace(8) inreg %buf) { +define void @store_v12i8(<12 x i8> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v12i8( -; CHECK-SAME: <12 x i8> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_LEGAL:%.*]] = bitcast <12 x i8> [[DATA]] to <3 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> [[DATA_LEGAL]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: <12 x i8> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v12i8(<12 x i8> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1424,11 +1269,10 @@ define void @store_v12i8(<12 x i8> %data, ptr addrspace(8) inreg %buf) { ret void } -define <16 x i8> @load_v16i8(ptr addrspace(8) inreg %buf) { +define <16 x i8> @load_v16i8(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <16 x i8> @load_v16i8( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = bitcast <4 x i32> [[RET_LOADABLE]] to <16 x i8> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <16 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v16i8(ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <16 x i8> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1436,11 +1280,10 @@ define <16 x i8> @load_v16i8(ptr addrspace(8) inreg %buf) { ret <16 x i8> %ret } -define void @store_v16i8(<16 x i8> %data, ptr addrspace(8) inreg %buf) { +define void @store_v16i8(<16 x i8> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v16i8( -; CHECK-SAME: <16 x i8> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_LEGAL:%.*]] = bitcast <16 x i8> [[DATA]] to <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DATA_LEGAL]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: <16 x i8> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v16i8(<16 x i8> [[DATA]], ptr addrspace(8) align 16 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1448,16 +1291,10 @@ define void @store_v16i8(<16 x i8> %data, ptr addrspace(8) inreg %buf) { ret void } -define <32 x i8> @load_v32i8(ptr addrspace(8) inreg %buf) { +define <32 x i8> @load_v32i8(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <32 x i8> @load_v32i8( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <4 x i32> [[RET_OFF_0]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <8 x i32> poison, <8 x i32> [[RET_EXT_0]], <8 x i32> -; CHECK-NEXT: [[RET_OFF_16:%.*]] = call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_4:%.*]] = shufflevector <4 x i32> [[RET_OFF_16]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[RET_PARTS_4:%.*]] = shufflevector <8 x i32> [[RET_PARTS_0]], <8 x i32> [[RET_EXT_4]], <8 x i32> -; CHECK-NEXT: [[RET:%.*]] = bitcast <8 x i32> [[RET_PARTS_4]] to <32 x i8> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <32 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v32i8(ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <32 x i8> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1465,14 +1302,10 @@ define <32 x i8> @load_v32i8(ptr addrspace(8) inreg %buf) { ret <32 x i8> %ret } -define void @store_v32i8(<32 x i8> %data, ptr addrspace(8) inreg %buf) { +define void @store_v32i8(<32 x i8> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v32i8( -; CHECK-SAME: <32 x i8> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_LEGAL:%.*]] = bitcast <32 x i8> [[DATA]] to <8 x i32> -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <8 x i32> [[DATA_LEGAL]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DATA_SLICE_0]], ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_4:%.*]] = shufflevector <8 x i32> [[DATA_LEGAL]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DATA_SLICE_4]], ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) +; CHECK-SAME: <32 x i8> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v32i8(<32 x i8> [[DATA]], ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1482,13 +1315,10 @@ define void @store_v32i8(<32 x i8> %data, ptr addrspace(8) inreg %buf) { ;;; Arrays. Need to become vectors. -define [1 x i32] @load_a1i32(ptr addrspace(8) inreg %buf) { +define [1 x i32] @load_a1i32(ptr addrspace(8) %buf) { ; CHECK-LABEL: define [1 x i32] @load_a1i32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_FROM_LOADABLE:%.*]] = bitcast i32 [[RET_LOADABLE]] to <1 x i32> -; CHECK-NEXT: [[RET_ELEM_0:%.*]] = extractelement <1 x i32> [[RET_FROM_LOADABLE]], i64 0 -; CHECK-NEXT: [[RET:%.*]] = insertvalue [1 x i32] poison, i32 [[RET_ELEM_0]], 0 +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call [1 x i32] @llvm.amdgcn.raw.ptr.buffer.load.a1i32(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret [1 x i32] [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1496,13 +1326,10 @@ define [1 x i32] @load_a1i32(ptr addrspace(8) inreg %buf) { ret [1 x i32] %ret } -define void @store_a1i32([1 x i32] %data, ptr addrspace(8) inreg %buf) { +define void @store_a1i32([1 x i32] %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_a1i32( -; CHECK-SAME: [1 x i32] [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_ELEM_0:%.*]] = extractvalue [1 x i32] [[DATA]], 0 -; CHECK-NEXT: [[DATA_AS_VEC_0:%.*]] = insertelement <1 x i32> poison, i32 [[DATA_ELEM_0]], i64 0 -; CHECK-NEXT: [[DATA_STORABLE:%.*]] = bitcast <1 x i32> [[DATA_AS_VEC_0]] to i32 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[DATA_STORABLE]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: [1 x i32] [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.a1i32([1 x i32] [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1510,14 +1337,10 @@ define void @store_a1i32([1 x i32] %data, ptr addrspace(8) inreg %buf) { ret void } -define [2 x i32] @load_a2i32(ptr addrspace(8) inreg %buf) { +define [2 x i32] @load_a2i32(ptr addrspace(8) %buf) { ; CHECK-LABEL: define [2 x i32] @load_a2i32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_ELEM_0:%.*]] = extractelement <2 x i32> [[RET_LOADABLE]], i64 0 -; CHECK-NEXT: [[RET_AS_ARRAY_0:%.*]] = insertvalue [2 x i32] poison, i32 [[RET_ELEM_0]], 0 -; CHECK-NEXT: [[RET_ELEM_1:%.*]] = extractelement <2 x i32> [[RET_LOADABLE]], i64 1 -; CHECK-NEXT: [[RET:%.*]] = insertvalue [2 x i32] [[RET_AS_ARRAY_0]], i32 [[RET_ELEM_1]], 1 +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call [2 x i32] @llvm.amdgcn.raw.ptr.buffer.load.a2i32(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret [2 x i32] [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1525,14 +1348,10 @@ define [2 x i32] @load_a2i32(ptr addrspace(8) inreg %buf) { ret [2 x i32] %ret } -define void @store_a2i32([2 x i32] %data, ptr addrspace(8) inreg %buf) { +define void @store_a2i32([2 x i32] %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_a2i32( -; CHECK-SAME: [2 x i32] [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_ELEM_0:%.*]] = extractvalue [2 x i32] [[DATA]], 0 -; CHECK-NEXT: [[DATA_AS_VEC_0:%.*]] = insertelement <2 x i32> poison, i32 [[DATA_ELEM_0]], i64 0 -; CHECK-NEXT: [[DATA_ELEM_1:%.*]] = extractvalue [2 x i32] [[DATA]], 1 -; CHECK-NEXT: [[DATA_AS_VEC_1:%.*]] = insertelement <2 x i32> [[DATA_AS_VEC_0]], i32 [[DATA_ELEM_1]], i64 1 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> [[DATA_AS_VEC_1]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: [2 x i32] [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.a2i32([2 x i32] [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1540,14 +1359,10 @@ define void @store_a2i32([2 x i32] %data, ptr addrspace(8) inreg %buf) { ret void } -define [2 x half] @load_a2f16(ptr addrspace(8) inreg %buf) { +define [2 x half] @load_a2f16(ptr addrspace(8) %buf) { ; CHECK-LABEL: define [2 x half] @load_a2f16( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call <2 x half> @llvm.amdgcn.raw.ptr.buffer.load.v2f16(ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_ELEM_0:%.*]] = extractelement <2 x half> [[RET_LOADABLE]], i64 0 -; CHECK-NEXT: [[RET_AS_ARRAY_0:%.*]] = insertvalue [2 x half] poison, half [[RET_ELEM_0]], 0 -; CHECK-NEXT: [[RET_ELEM_1:%.*]] = extractelement <2 x half> [[RET_LOADABLE]], i64 1 -; CHECK-NEXT: [[RET:%.*]] = insertvalue [2 x half] [[RET_AS_ARRAY_0]], half [[RET_ELEM_1]], 1 +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call [2 x half] @llvm.amdgcn.raw.ptr.buffer.load.a2f16(ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret [2 x half] [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1555,14 +1370,10 @@ define [2 x half] @load_a2f16(ptr addrspace(8) inreg %buf) { ret [2 x half] %ret } -define void @store_a2f16([2 x half] %data, ptr addrspace(8) inreg %buf) { +define void @store_a2f16([2 x half] %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_a2f16( -; CHECK-SAME: [2 x half] [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_ELEM_0:%.*]] = extractvalue [2 x half] [[DATA]], 0 -; CHECK-NEXT: [[DATA_AS_VEC_0:%.*]] = insertelement <2 x half> poison, half [[DATA_ELEM_0]], i64 0 -; CHECK-NEXT: [[DATA_ELEM_1:%.*]] = extractvalue [2 x half] [[DATA]], 1 -; CHECK-NEXT: [[DATA_AS_VEC_1:%.*]] = insertelement <2 x half> [[DATA_AS_VEC_0]], half [[DATA_ELEM_1]], i64 1 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2f16(<2 x half> [[DATA_AS_VEC_1]], ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: [2 x half] [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.a2f16([2 x half] [[DATA]], ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1570,14 +1381,10 @@ define void @store_a2f16([2 x half] %data, ptr addrspace(8) inreg %buf) { ret void } -define [2 x ptr addrspace(1)] @load_a2p1(ptr addrspace(8) inreg %buf) { +define [2 x ptr addrspace(1)] @load_a2p1(ptr addrspace(8) %buf) { ; CHECK-LABEL: define [2 x ptr addrspace(1)] @load_a2p1( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call <2 x ptr addrspace(1)> @llvm.amdgcn.raw.ptr.buffer.load.v2p1(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_ELEM_0:%.*]] = extractelement <2 x ptr addrspace(1)> [[RET_LOADABLE]], i64 0 -; CHECK-NEXT: [[RET_AS_ARRAY_0:%.*]] = insertvalue [2 x ptr addrspace(1)] poison, ptr addrspace(1) [[RET_ELEM_0]], 0 -; CHECK-NEXT: [[RET_ELEM_1:%.*]] = extractelement <2 x ptr addrspace(1)> [[RET_LOADABLE]], i64 1 -; CHECK-NEXT: [[RET:%.*]] = insertvalue [2 x ptr addrspace(1)] [[RET_AS_ARRAY_0]], ptr addrspace(1) [[RET_ELEM_1]], 1 +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call [2 x ptr addrspace(1)] @llvm.amdgcn.raw.ptr.buffer.load.a2p1(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret [2 x ptr addrspace(1)] [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1585,14 +1392,10 @@ define [2 x ptr addrspace(1)] @load_a2p1(ptr addrspace(8) inreg %buf) { ret [2 x ptr addrspace(1)] %ret } -define void @store_a2p1([2 x ptr addrspace(1)] %data, ptr addrspace(8) inreg %buf) { +define void @store_a2p1([2 x ptr addrspace(1)] %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_a2p1( -; CHECK-SAME: [2 x ptr addrspace(1)] [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_ELEM_0:%.*]] = extractvalue [2 x ptr addrspace(1)] [[DATA]], 0 -; CHECK-NEXT: [[DATA_AS_VEC_0:%.*]] = insertelement <2 x ptr addrspace(1)> poison, ptr addrspace(1) [[DATA_ELEM_0]], i64 0 -; CHECK-NEXT: [[DATA_ELEM_1:%.*]] = extractvalue [2 x ptr addrspace(1)] [[DATA]], 1 -; CHECK-NEXT: [[DATA_AS_VEC_1:%.*]] = insertelement <2 x ptr addrspace(1)> [[DATA_AS_VEC_0]], ptr addrspace(1) [[DATA_ELEM_1]], i64 1 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2p1(<2 x ptr addrspace(1)> [[DATA_AS_VEC_1]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: [2 x ptr addrspace(1)] [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.a2p1([2 x ptr addrspace(1)] [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1602,16 +1405,10 @@ define void @store_a2p1([2 x ptr addrspace(1)] %data, ptr addrspace(8) inreg %bu ;;; Scalars of atypical width. Need to be cast to vectors and split. -define i40 @load_i40(ptr addrspace(8) inreg %buf) { +define i40 @load_i40(ptr addrspace(8) %buf) { ; CHECK-LABEL: define i40 @load_i40( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_OFF_0_FROM_LOADABLE:%.*]] = bitcast i32 [[RET_OFF_0]] to <4 x i8> -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <4 x i8> [[RET_OFF_0_FROM_LOADABLE]], <4 x i8> poison, <5 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <5 x i8> poison, <5 x i8> [[RET_EXT_0]], <5 x i32> -; CHECK-NEXT: [[RET_OFF_4:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) align 4 [[BUF]], i32 4, i32 0, i32 0) -; CHECK-NEXT: [[RET_SLICE_4:%.*]] = insertelement <5 x i8> [[RET_PARTS_0]], i8 [[RET_OFF_4]], i64 4 -; CHECK-NEXT: [[RET:%.*]] = bitcast <5 x i8> [[RET_SLICE_4]] to i40 +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call i40 @llvm.amdgcn.raw.ptr.buffer.load.i40(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret i40 [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1619,15 +1416,10 @@ define i40 @load_i40(ptr addrspace(8) inreg %buf) { ret i40 %ret } -define void @store_i40(i40 %data, ptr addrspace(8) inreg %buf) { +define void @store_i40(i40 %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_i40( -; CHECK-SAME: i40 [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_LEGAL:%.*]] = bitcast i40 [[DATA]] to <5 x i8> -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <5 x i8> [[DATA_LEGAL]], <5 x i8> poison, <4 x i32> -; CHECK-NEXT: [[DATA_SLICE_0_STORABLE:%.*]] = bitcast <4 x i8> [[DATA_SLICE_0]] to i32 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[DATA_SLICE_0_STORABLE]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_4:%.*]] = extractelement <5 x i8> [[DATA_LEGAL]], i64 4 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 [[DATA_SLICE_4]], ptr addrspace(8) align 4 [[BUF]], i32 4, i32 0, i32 0) +; CHECK-SAME: i40 [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i40(i40 [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1635,11 +1427,10 @@ define void @store_i40(i40 %data, ptr addrspace(8) inreg %buf) { ret void } -define i96 @load_i96(ptr addrspace(8) inreg %buf) { +define i96 @load_i96(ptr addrspace(8) %buf) { ; CHECK-LABEL: define i96 @load_i96( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call <3 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v3i32(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = bitcast <3 x i32> [[RET_LOADABLE]] to i96 +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call i96 @llvm.amdgcn.raw.ptr.buffer.load.i96(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret i96 [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1647,11 +1438,10 @@ define i96 @load_i96(ptr addrspace(8) inreg %buf) { ret i96 %ret } -define void @store_i96(i96 %data, ptr addrspace(8) inreg %buf) { +define void @store_i96(i96 %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_i96( -; CHECK-SAME: i96 [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_LEGAL:%.*]] = bitcast i96 [[DATA]] to <3 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v3i32(<3 x i32> [[DATA_LEGAL]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: i96 [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i96(i96 [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1659,15 +1449,10 @@ define void @store_i96(i96 %data, ptr addrspace(8) inreg %buf) { ret void } -define i160 @load_i160(ptr addrspace(8) inreg %buf) { +define i160 @load_i160(ptr addrspace(8) %buf) { ; CHECK-LABEL: define i160 @load_i160( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <4 x i32> [[RET_OFF_0]], <4 x i32> poison, <5 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <5 x i32> poison, <5 x i32> [[RET_EXT_0]], <5 x i32> -; CHECK-NEXT: [[RET_OFF_16:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) align 8 [[BUF]], i32 16, i32 0, i32 0) -; CHECK-NEXT: [[RET_SLICE_4:%.*]] = insertelement <5 x i32> [[RET_PARTS_0]], i32 [[RET_OFF_16]], i64 4 -; CHECK-NEXT: [[RET:%.*]] = bitcast <5 x i32> [[RET_SLICE_4]] to i160 +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call i160 @llvm.amdgcn.raw.ptr.buffer.load.i160(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret i160 [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1675,14 +1460,10 @@ define i160 @load_i160(ptr addrspace(8) inreg %buf) { ret i160 %ret } -define void @store_i160(i160 %data, ptr addrspace(8) inreg %buf) { +define void @store_i160(i160 %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_i160( -; CHECK-SAME: i160 [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_LEGAL:%.*]] = bitcast i160 [[DATA]] to <5 x i32> -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <5 x i32> [[DATA_LEGAL]], <5 x i32> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DATA_SLICE_0]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_4:%.*]] = extractelement <5 x i32> [[DATA_LEGAL]], i64 4 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[DATA_SLICE_4]], ptr addrspace(8) align 8 [[BUF]], i32 16, i32 0, i32 0) +; CHECK-SAME: i160 [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i160(i160 [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1690,16 +1471,10 @@ define void @store_i160(i160 %data, ptr addrspace(8) inreg %buf) { ret void } -define i256 @load_i256(ptr addrspace(8) inreg %buf) { +define i256 @load_i256(ptr addrspace(8) %buf) { ; CHECK-LABEL: define i256 @load_i256( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <4 x i32> [[RET_OFF_0]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <8 x i32> poison, <8 x i32> [[RET_EXT_0]], <8 x i32> -; CHECK-NEXT: [[RET_OFF_16:%.*]] = call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) align 8 [[BUF]], i32 16, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_4:%.*]] = shufflevector <4 x i32> [[RET_OFF_16]], <4 x i32> poison, <8 x i32> -; CHECK-NEXT: [[RET_PARTS_4:%.*]] = shufflevector <8 x i32> [[RET_PARTS_0]], <8 x i32> [[RET_EXT_4]], <8 x i32> -; CHECK-NEXT: [[RET:%.*]] = bitcast <8 x i32> [[RET_PARTS_4]] to i256 +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call i256 @llvm.amdgcn.raw.ptr.buffer.load.i256(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret i256 [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1707,14 +1482,10 @@ define i256 @load_i256(ptr addrspace(8) inreg %buf) { ret i256 %ret } -define void @store_i256(i256 %data, ptr addrspace(8) inreg %buf) { +define void @store_i256(i256 %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_i256( -; CHECK-SAME: i256 [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_LEGAL:%.*]] = bitcast i256 [[DATA]] to <8 x i32> -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <8 x i32> [[DATA_LEGAL]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DATA_SLICE_0]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_4:%.*]] = shufflevector <8 x i32> [[DATA_LEGAL]], <8 x i32> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DATA_SLICE_4]], ptr addrspace(8) align 8 [[BUF]], i32 16, i32 0, i32 0) +; CHECK-SAME: i256 [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i256(i256 [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1724,11 +1495,10 @@ define void @store_i256(i256 %data, ptr addrspace(8) inreg %buf) { ;;; Non-byte-sized scalars. Require zero-extension. -define i7 @load_i7(ptr addrspace(8) inreg %buf) { -; CHECK-LABEL: define i7 @load_i7( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = trunc i8 [[RET_LOADABLE]] to i7 +define i7 @load_i4(ptr addrspace(8) %buf) { +; CHECK-LABEL: define i7 @load_i4( +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call i7 @llvm.amdgcn.raw.ptr.buffer.load.i7(ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret i7 [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1736,49 +1506,23 @@ define i7 @load_i7(ptr addrspace(8) inreg %buf) { ret i7 %ret } -define void @store_i7(i7 %data, ptr addrspace(8) inreg %buf) { -; CHECK-LABEL: define void @store_i7( -; CHECK-SAME: i7 [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_ZEXT:%.*]] = zext i7 [[DATA]] to i8 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 [[DATA_ZEXT]], ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: ret void -; - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store i7 %data, ptr addrspace(7) %p - ret void -} - -define i4 @load_i4(ptr addrspace(8) inreg %buf) { -; CHECK-LABEL: define i4 @load_i4( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = trunc i8 [[RET_LOADABLE]] to i4 -; CHECK-NEXT: ret i4 [[RET]] -; - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load i4, ptr addrspace(7) %p - ret i4 %ret -} - -define void @store_i4(i4 %data, ptr addrspace(8) inreg %buf) { +define void @store_i4(i7 %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_i4( -; CHECK-SAME: i4 [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_ZEXT:%.*]] = zext i4 [[DATA]] to i8 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 [[DATA_ZEXT]], ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: i7 [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i7(i7 [[DATA]], ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store i4 %data, ptr addrspace(7) %p + store i7 %data, ptr addrspace(7) %p ret void } ;;; Byte-sized vectors of i4. Require casts. -define <2 x i4> @load_v2i4(ptr addrspace(8) inreg %buf) { +define <2 x i4> @load_v2i4(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <2 x i4> @load_v2i4( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = bitcast i8 [[RET_LOADABLE]] to <2 x i4> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <2 x i4> @llvm.amdgcn.raw.ptr.buffer.load.v2i4(ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <2 x i4> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1786,11 +1530,10 @@ define <2 x i4> @load_v2i4(ptr addrspace(8) inreg %buf) { ret <2 x i4> %ret } -define void @store_v2i4(<2 x i4> %data, ptr addrspace(8) inreg %buf) { +define void @store_v2i4(<2 x i4> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v2i4( -; CHECK-SAME: <2 x i4> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_LEGAL:%.*]] = bitcast <2 x i4> [[DATA]] to i8 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 [[DATA_LEGAL]], ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: <2 x i4> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i4(<2 x i4> [[DATA]], ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1798,11 +1541,10 @@ define void @store_v2i4(<2 x i4> %data, ptr addrspace(8) inreg %buf) { ret void } -define <4 x i4> @load_v4i4(ptr addrspace(8) inreg %buf) { +define <4 x i4> @load_v4i4(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <4 x i4> @load_v4i4( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = bitcast i16 [[RET_LOADABLE]] to <4 x i4> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <4 x i4> @llvm.amdgcn.raw.ptr.buffer.load.v4i4(ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <4 x i4> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1810,11 +1552,10 @@ define <4 x i4> @load_v4i4(ptr addrspace(8) inreg %buf) { ret <4 x i4> %ret } -define void @store_v4i4(<4 x i4> %data, ptr addrspace(8) inreg %buf) { +define void @store_v4i4(<4 x i4> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v4i4( -; CHECK-SAME: <4 x i4> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_LEGAL:%.*]] = bitcast <4 x i4> [[DATA]] to i16 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 [[DATA_LEGAL]], ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: <4 x i4> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i4(<4 x i4> [[DATA]], ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1822,11 +1563,10 @@ define void @store_v4i4(<4 x i4> %data, ptr addrspace(8) inreg %buf) { ret void } -define <8 x i4> @load_v8i4(ptr addrspace(8) inreg %buf) { +define <8 x i4> @load_v8i4(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <8 x i4> @load_v8i4( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = bitcast i32 [[RET_LOADABLE]] to <8 x i4> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <8 x i4> @llvm.amdgcn.raw.ptr.buffer.load.v8i4(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <8 x i4> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1834,11 +1574,10 @@ define <8 x i4> @load_v8i4(ptr addrspace(8) inreg %buf) { ret <8 x i4> %ret } -define void @store_v8i4(<8 x i4> %data, ptr addrspace(8) inreg %buf) { +define void @store_v8i4(<8 x i4> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v8i4( -; CHECK-SAME: <8 x i4> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_LEGAL:%.*]] = bitcast <8 x i4> [[DATA]] to i32 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[DATA_LEGAL]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: <8 x i4> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v8i4(<8 x i4> [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1848,12 +1587,10 @@ define void @store_v8i4(<8 x i4> %data, ptr addrspace(8) inreg %buf) { ;;; Vectors of non-byte-sized integers. -define <2 x i6> @load_v2i6(ptr addrspace(8) inreg %buf) { +define <2 x i6> @load_v2i6(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <2 x i6> @load_v2i6( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_TRUNC:%.*]] = trunc i16 [[RET_LOADABLE]] to i12 -; CHECK-NEXT: [[RET:%.*]] = bitcast i12 [[RET_TRUNC]] to <2 x i6> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <2 x i6> @llvm.amdgcn.raw.ptr.buffer.load.v2i6(ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret <2 x i6> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1861,12 +1598,10 @@ define <2 x i6> @load_v2i6(ptr addrspace(8) inreg %buf) { ret <2 x i6> %ret } -define void @store_v2i6(<2 x i6> %data, ptr addrspace(8) inreg %buf) { +define void @store_v2i6(<2 x i6> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v2i6( -; CHECK-SAME: <2 x i6> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_AS_SCALAR:%.*]] = bitcast <2 x i6> [[DATA]] to i12 -; CHECK-NEXT: [[DATA_ZEXT:%.*]] = zext i12 [[DATA_AS_SCALAR]] to i16 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 [[DATA_ZEXT]], ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) +; CHECK-SAME: <2 x i6> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i6(<2 x i6> [[DATA]], ptr addrspace(8) align 2 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1875,16 +1610,10 @@ define void @store_v2i6(<2 x i6> %data, ptr addrspace(8) inreg %buf) { } ;; Blocks of fp6 elements -define <6 x i32> @load_v32i6(ptr addrspace(8) inreg %buf) { +define <6 x i32> @load_v32i6(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <6 x i32> @load_v32i6( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call <4 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v4i32(ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <4 x i32> [[RET_OFF_0]], <4 x i32> poison, <6 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <6 x i32> poison, <6 x i32> [[RET_EXT_0]], <6 x i32> -; CHECK-NEXT: [[RET_OFF_16:%.*]] = call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) -; CHECK-NEXT: [[RET_EXT_4:%.*]] = shufflevector <2 x i32> [[RET_OFF_16]], <2 x i32> poison, <6 x i32> -; CHECK-NEXT: [[RET_PARTS_4:%.*]] = shufflevector <6 x i32> [[RET_PARTS_0]], <6 x i32> [[RET_EXT_4]], <6 x i32> -; CHECK-NEXT: [[RET:%.*]] = bitcast <6 x i32> [[RET_PARTS_4]] to <32 x i6> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <32 x i6> @llvm.amdgcn.raw.ptr.buffer.load.v32i6(ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: [[RET_CAST:%.*]] = bitcast <32 x i6> [[RET]] to <6 x i32> ; CHECK-NEXT: ret <6 x i32> [[RET_CAST]] ; @@ -1894,15 +1623,11 @@ define <6 x i32> @load_v32i6(ptr addrspace(8) inreg %buf) { ret <6 x i32> %ret.cast } -define void @store_v32i6(<6 x i32> %data.abi, ptr addrspace(8) inreg %buf) { +define void @store_v32i6(<6 x i32> %data.abi, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @store_v32i6( -; CHECK-SAME: <6 x i32> [[DATA_ABI:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-SAME: <6 x i32> [[DATA_ABI:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { ; CHECK-NEXT: [[DATA:%.*]] = bitcast <6 x i32> [[DATA_ABI]] to <32 x i6> -; CHECK-NEXT: [[DATA_LEGAL:%.*]] = bitcast <32 x i6> [[DATA]] to <6 x i32> -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <6 x i32> [[DATA_LEGAL]], <6 x i32> poison, <4 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[DATA_SLICE_0]], ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_SLICE_4:%.*]] = shufflevector <6 x i32> [[DATA_LEGAL]], <6 x i32> poison, <2 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> [[DATA_SLICE_4]], ptr addrspace(8) align 16 [[BUF]], i32 16, i32 0, i32 0) +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v32i6(<32 x i6> [[DATA]], ptr addrspace(8) align 32 [[BUF]], i32 0, i32 0, i32 0) ; CHECK-NEXT: ret void ; %data = bitcast <6 x i32> %data.abi to <32 x i6> @@ -1913,11 +1638,10 @@ define void @store_v32i6(<6 x i32> %data.abi, ptr addrspace(8) inreg %buf) { ;;; Modifiers -define <4 x i8> @volatile_load_v4i8(ptr addrspace(8) inreg %buf) { +define <4 x i8> @volatile_load_v4i8(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <4 x i8> @volatile_load_v4i8( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_LOADABLE:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 -2147483648) -; CHECK-NEXT: [[RET:%.*]] = bitcast i32 [[RET_LOADABLE]] to <4 x i8> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <4 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v4i8(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 -2147483648) ; CHECK-NEXT: ret <4 x i8> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1925,11 +1649,10 @@ define <4 x i8> @volatile_load_v4i8(ptr addrspace(8) inreg %buf) { ret <4 x i8> %ret } -define void @volatile_store_v4i8(<4 x i8> %data, ptr addrspace(8) inreg %buf) { +define void @volatile_store_v4i8(<4 x i8> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @volatile_store_v4i8( -; CHECK-SAME: <4 x i8> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_LEGAL:%.*]] = bitcast <4 x i8> [[DATA]] to i32 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[DATA_LEGAL]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 -2147483648) +; CHECK-SAME: <4 x i8> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i8(<4 x i8> [[DATA]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 -2147483648) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1937,15 +1660,10 @@ define void @volatile_store_v4i8(<4 x i8> %data, ptr addrspace(8) inreg %buf) { ret void } -define <6 x i8> @volatile_load_v6i8(ptr addrspace(8) inreg %buf) { +define <6 x i8> @volatile_load_v6i8(ptr addrspace(8) %buf) { ; CHECK-LABEL: define <6 x i8> @volatile_load_v6i8( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_OFF_0:%.*]] = call <2 x i16> @llvm.amdgcn.raw.ptr.buffer.load.v2i16(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 -2147483648) -; CHECK-NEXT: [[RET_EXT_0:%.*]] = shufflevector <2 x i16> [[RET_OFF_0]], <2 x i16> poison, <3 x i32> -; CHECK-NEXT: [[RET_PARTS_0:%.*]] = shufflevector <3 x i16> poison, <3 x i16> [[RET_EXT_0]], <3 x i32> -; CHECK-NEXT: [[RET_OFF_4:%.*]] = call i16 @llvm.amdgcn.raw.ptr.buffer.load.i16(ptr addrspace(8) align 4 [[BUF]], i32 4, i32 0, i32 -2147483648) -; CHECK-NEXT: [[RET_SLICE_2:%.*]] = insertelement <3 x i16> [[RET_PARTS_0]], i16 [[RET_OFF_4]], i64 2 -; CHECK-NEXT: [[RET:%.*]] = bitcast <3 x i16> [[RET_SLICE_2]] to <6 x i8> +; CHECK-SAME: ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: [[RET:%.*]] = call <6 x i8> @llvm.amdgcn.raw.ptr.buffer.load.v6i8(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 -2147483648) ; CHECK-NEXT: ret <6 x i8> [[RET]] ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) @@ -1953,257 +1671,13 @@ define <6 x i8> @volatile_load_v6i8(ptr addrspace(8) inreg %buf) { ret <6 x i8> %ret } -define void @volatile_store_v6i8(<6 x i8> %data, ptr addrspace(8) inreg %buf) { +define void @volatile_store_v6i8(<6 x i8> %data, ptr addrspace(8) %buf) { ; CHECK-LABEL: define void @volatile_store_v6i8( -; CHECK-SAME: <6 x i8> [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_LEGAL:%.*]] = bitcast <6 x i8> [[DATA]] to <3 x i16> -; CHECK-NEXT: [[DATA_SLICE_0:%.*]] = shufflevector <3 x i16> [[DATA_LEGAL]], <3 x i16> poison, <2 x i32> -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i16(<2 x i16> [[DATA_SLICE_0]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 -2147483648) -; CHECK-NEXT: [[DATA_SLICE_2:%.*]] = extractelement <3 x i16> [[DATA_LEGAL]], i64 2 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i16(i16 [[DATA_SLICE_2]], ptr addrspace(8) align 4 [[BUF]], i32 4, i32 0, i32 -2147483648) +; CHECK-SAME: <6 x i8> [[DATA:%.*]], ptr addrspace(8) [[BUF:%.*]]) #[[ATTR0]] { +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v6i8(<6 x i8> [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 -2147483648) ; CHECK-NEXT: ret void ; %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) store volatile <6 x i8> %data, ptr addrspace(7) %p ret void } - -define [2 x [2 x i32]] @load_a2a2i32(ptr addrspace(8) inreg %buf) { -; CHECK-LABEL: define [2 x [2 x i32]] @load_a2a2i32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET0_OFF_0:%.*]] = call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET0_ELEM_0:%.*]] = extractelement <2 x i32> [[RET0_OFF_0]], i64 0 -; CHECK-NEXT: [[RET0_AS_ARRAY_0:%.*]] = insertvalue [2 x i32] poison, i32 [[RET0_ELEM_0]], 0 -; CHECK-NEXT: [[RET0_ELEM_1:%.*]] = extractelement <2 x i32> [[RET0_OFF_0]], i64 1 -; CHECK-NEXT: [[RET0_AS_ARRAY_1:%.*]] = insertvalue [2 x i32] [[RET0_AS_ARRAY_0]], i32 [[RET0_ELEM_1]], 1 -; CHECK-NEXT: [[RET0:%.*]] = insertvalue [2 x [2 x i32]] poison, [2 x i32] [[RET0_AS_ARRAY_1]], 0 -; CHECK-NEXT: [[RET1_OFF_8:%.*]] = call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) align 4 [[BUF]], i32 8, i32 0, i32 0) -; CHECK-NEXT: [[RET1_ELEM_0:%.*]] = extractelement <2 x i32> [[RET1_OFF_8]], i64 0 -; CHECK-NEXT: [[RET1_AS_ARRAY_0:%.*]] = insertvalue [2 x i32] poison, i32 [[RET1_ELEM_0]], 0 -; CHECK-NEXT: [[RET1_ELEM_1:%.*]] = extractelement <2 x i32> [[RET1_OFF_8]], i64 1 -; CHECK-NEXT: [[RET1_AS_ARRAY_1:%.*]] = insertvalue [2 x i32] [[RET1_AS_ARRAY_0]], i32 [[RET1_ELEM_1]], 1 -; CHECK-NEXT: [[RET:%.*]] = insertvalue [2 x [2 x i32]] [[RET0]], [2 x i32] [[RET1_AS_ARRAY_1]], 1 -; CHECK-NEXT: ret [2 x [2 x i32]] [[RET]] -; - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load [2 x [2 x i32]], ptr addrspace(7) %p - ret [2 x [2 x i32]] %ret -} - -define void @store_a2a2i32([2 x [2 x i32]] %data, ptr addrspace(8) inreg %buf) { -; CHECK-LABEL: define void @store_a2a2i32( -; CHECK-SAME: [2 x [2 x i32]] [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA0:%.*]] = extractvalue [2 x [2 x i32]] [[DATA]], 0 -; CHECK-NEXT: [[DATA0_ELEM_0:%.*]] = extractvalue [2 x i32] [[DATA0]], 0 -; CHECK-NEXT: [[DATA0_AS_VEC_0:%.*]] = insertelement <2 x i32> poison, i32 [[DATA0_ELEM_0]], i64 0 -; CHECK-NEXT: [[DATA0_ELEM_1:%.*]] = extractvalue [2 x i32] [[DATA0]], 1 -; CHECK-NEXT: [[DATA0_AS_VEC_1:%.*]] = insertelement <2 x i32> [[DATA0_AS_VEC_0]], i32 [[DATA0_ELEM_1]], i64 1 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> [[DATA0_AS_VEC_1]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA1:%.*]] = extractvalue [2 x [2 x i32]] [[DATA]], 1 -; CHECK-NEXT: [[DATA1_ELEM_0:%.*]] = extractvalue [2 x i32] [[DATA1]], 0 -; CHECK-NEXT: [[DATA1_AS_VEC_0:%.*]] = insertelement <2 x i32> poison, i32 [[DATA1_ELEM_0]], i64 0 -; CHECK-NEXT: [[DATA1_ELEM_1:%.*]] = extractvalue [2 x i32] [[DATA1]], 1 -; CHECK-NEXT: [[DATA1_AS_VEC_1:%.*]] = insertelement <2 x i32> [[DATA1_AS_VEC_0]], i32 [[DATA1_ELEM_1]], i64 1 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> [[DATA1_AS_VEC_1]], ptr addrspace(8) align 4 [[BUF]], i32 8, i32 0, i32 0) -; CHECK-NEXT: ret void -; - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store [2 x [2 x i32]] %data, ptr addrspace(7) %p - ret void -} - -define [2 x <2 x i32>] @load_a2v2i32(ptr addrspace(8) inreg %buf) { -; CHECK-LABEL: define [2 x <2 x i32>] @load_a2v2i32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET0_OFF_0:%.*]] = call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET0:%.*]] = insertvalue [2 x <2 x i32>] poison, <2 x i32> [[RET0_OFF_0]], 0 -; CHECK-NEXT: [[RET1_OFF_8:%.*]] = call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) align 8 [[BUF]], i32 8, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = insertvalue [2 x <2 x i32>] [[RET0]], <2 x i32> [[RET1_OFF_8]], 1 -; CHECK-NEXT: ret [2 x <2 x i32>] [[RET]] -; - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load [2 x <2 x i32>], ptr addrspace(7) %p - ret [2 x <2 x i32>] %ret -} - -define void @store_a2v2i32([2 x <2 x i32>] %data, ptr addrspace(8) inreg %buf) { -; CHECK-LABEL: define void @store_a2v2i32( -; CHECK-SAME: [2 x <2 x i32>] [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA0:%.*]] = extractvalue [2 x <2 x i32>] [[DATA]], 0 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> [[DATA0]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA1:%.*]] = extractvalue [2 x <2 x i32>] [[DATA]], 1 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> [[DATA1]], ptr addrspace(8) align 8 [[BUF]], i32 8, i32 0, i32 0) -; CHECK-NEXT: ret void -; - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store [2 x <2 x i32>] %data, ptr addrspace(7) %p - ret void -} - -define { i32 } @load_sl_i32s(ptr addrspace(8) inreg %buf) { -; CHECK-LABEL: define { i32 } @load_sl_i32s( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_0_OFF_0:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = insertvalue { i32 } poison, i32 [[RET_0_OFF_0]], 0 -; CHECK-NEXT: ret { i32 } [[RET]] -; - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load { i32 }, ptr addrspace(7) %p - ret { i32 } %ret -} - -define void @store_sl_i32s({ i32 } %data, ptr addrspace(8) inreg %buf) { -; CHECK-LABEL: define void @store_sl_i32s( -; CHECK-SAME: { i32 } [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_0:%.*]] = extractvalue { i32 } [[DATA]], 0 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[DATA_0]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: ret void -; - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store { i32 } %data, ptr addrspace(7) %p - ret void -} - -define { { float } } @load_sl_sl_f32ss(ptr addrspace(8) inreg %buf) { -; CHECK-LABEL: define { { float } } @load_sl_sl_f32ss( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_0_0_OFF_0:%.*]] = call float @llvm.amdgcn.raw.ptr.buffer.load.f32(ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = insertvalue { { float } } poison, float [[RET_0_0_OFF_0]], 0, 0 -; CHECK-NEXT: ret { { float } } [[RET]] -; - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load { { float } }, ptr addrspace(7) %p - ret { { float } } %ret -} - -define void @store_sl_sl_f32ss({ { float } } %data, ptr addrspace(8) inreg %buf) { -; CHECK-LABEL: define void @store_sl_sl_f32ss( -; CHECK-SAME: { { float } } [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_0_0:%.*]] = extractvalue { { float } } [[DATA]], 0, 0 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.f32(float [[DATA_0_0]], ptr addrspace(8) align 4 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: ret void -; - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store { { float } } %data, ptr addrspace(7) %p - ret void -} - -define { <2 x i32> } @load_sl_v2i32s(ptr addrspace(8) inreg %buf) { -; CHECK-LABEL: define { <2 x i32> } @load_sl_v2i32s( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_0_OFF_0:%.*]] = call <2 x i32> @llvm.amdgcn.raw.ptr.buffer.load.v2i32(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = insertvalue { <2 x i32> } poison, <2 x i32> [[RET_0_OFF_0]], 0 -; CHECK-NEXT: ret { <2 x i32> } [[RET]] -; - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load { <2 x i32> }, ptr addrspace(7) %p - ret { <2 x i32> } %ret -} - -define void @store_sl_v2i32s({ <2 x i32> } %data, ptr addrspace(8) inreg %buf) { -; CHECK-LABEL: define void @store_sl_v2i32s( -; CHECK-SAME: { <2 x i32> } [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_0:%.*]] = extractvalue { <2 x i32> } [[DATA]], 0 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v2i32(<2 x i32> [[DATA_0]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: ret void -; - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store { <2 x i32> } %data, ptr addrspace(7) %p - ret void -} - -define { i64, i32 } @load_sl_i64i32s(ptr addrspace(8) inreg %buf) { -; CHECK-LABEL: define { i64, i32 } @load_sl_i64i32s( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET_0_OFF_0:%.*]] = call i64 @llvm.amdgcn.raw.ptr.buffer.load.i64(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET_0:%.*]] = insertvalue { i64, i32 } poison, i64 [[RET_0_OFF_0]], 0 -; CHECK-NEXT: [[RET_1_OFF_8:%.*]] = call i32 @llvm.amdgcn.raw.ptr.buffer.load.i32(ptr addrspace(8) align 8 [[BUF]], i32 8, i32 0, i32 0) -; CHECK-NEXT: [[RET:%.*]] = insertvalue { i64, i32 } [[RET_0]], i32 [[RET_1_OFF_8]], 1 -; CHECK-NEXT: ret { i64, i32 } [[RET]] -; - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load { i64, i32 }, ptr addrspace(7) %p - ret { i64, i32 } %ret -} - -define void @store_sl_i64i32s({ i64, i32 } %data, ptr addrspace(8) inreg %buf) { -; CHECK-LABEL: define void @store_sl_i64i32s( -; CHECK-SAME: { i64, i32 } [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA_0:%.*]] = extractvalue { i64, i32 } [[DATA]], 0 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i64(i64 [[DATA_0]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA_1:%.*]] = extractvalue { i64, i32 } [[DATA]], 1 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[DATA_1]], ptr addrspace(8) align 8 [[BUF]], i32 8, i32 0, i32 0) -; CHECK-NEXT: ret void -; - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store { i64, i32 } %data, ptr addrspace(7) %p - ret void -} - -define [4 x i7] @load_a4i7(ptr addrspace(8) inreg %buf) { -; CHECK-LABEL: define [4 x i7] @load_a4i7( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET0_OFF_0:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[RET0_TRUNC:%.*]] = trunc i8 [[RET0_OFF_0]] to i7 -; CHECK-NEXT: [[RET0:%.*]] = insertvalue [4 x i7] poison, i7 [[RET0_TRUNC]], 0 -; CHECK-NEXT: [[RET1_OFF_1:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) align 1 [[BUF]], i32 1, i32 0, i32 0) -; CHECK-NEXT: [[RET1_TRUNC:%.*]] = trunc i8 [[RET1_OFF_1]] to i7 -; CHECK-NEXT: [[RET1:%.*]] = insertvalue [4 x i7] [[RET0]], i7 [[RET1_TRUNC]], 1 -; CHECK-NEXT: [[RET2_OFF_2:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) align 1 [[BUF]], i32 2, i32 0, i32 0) -; CHECK-NEXT: [[RET2_TRUNC:%.*]] = trunc i8 [[RET2_OFF_2]] to i7 -; CHECK-NEXT: [[RET2:%.*]] = insertvalue [4 x i7] [[RET1]], i7 [[RET2_TRUNC]], 2 -; CHECK-NEXT: [[RET3_OFF_3:%.*]] = call i8 @llvm.amdgcn.raw.ptr.buffer.load.i8(ptr addrspace(8) align 1 [[BUF]], i32 3, i32 0, i32 0) -; CHECK-NEXT: [[RET3_TRUNC:%.*]] = trunc i8 [[RET3_OFF_3]] to i7 -; CHECK-NEXT: [[RET:%.*]] = insertvalue [4 x i7] [[RET2]], i7 [[RET3_TRUNC]], 3 -; CHECK-NEXT: ret [4 x i7] [[RET]] -; - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load [4 x i7], ptr addrspace(7) %p - ret [4 x i7] %ret -} - -define void @store_a4i7([4 x i7] %data, ptr addrspace(8) inreg %buf) { -; CHECK-LABEL: define void @store_a4i7( -; CHECK-SAME: [4 x i7] [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[DATA0:%.*]] = extractvalue [4 x i7] [[DATA]], 0 -; CHECK-NEXT: [[DATA0_ZEXT:%.*]] = zext i7 [[DATA0]] to i8 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 [[DATA0_ZEXT]], ptr addrspace(8) align 1 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: [[DATA1:%.*]] = extractvalue [4 x i7] [[DATA]], 1 -; CHECK-NEXT: [[DATA1_ZEXT:%.*]] = zext i7 [[DATA1]] to i8 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 [[DATA1_ZEXT]], ptr addrspace(8) align 1 [[BUF]], i32 1, i32 0, i32 0) -; CHECK-NEXT: [[DATA2:%.*]] = extractvalue [4 x i7] [[DATA]], 2 -; CHECK-NEXT: [[DATA2_ZEXT:%.*]] = zext i7 [[DATA2]] to i8 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 [[DATA2_ZEXT]], ptr addrspace(8) align 1 [[BUF]], i32 2, i32 0, i32 0) -; CHECK-NEXT: [[DATA3:%.*]] = extractvalue [4 x i7] [[DATA]], 3 -; CHECK-NEXT: [[DATA3_ZEXT:%.*]] = zext i7 [[DATA3]] to i8 -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i8(i8 [[DATA3_ZEXT]], ptr addrspace(8) align 1 [[BUF]], i32 3, i32 0, i32 0) -; CHECK-NEXT: ret void -; - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store [4 x i7] %data, ptr addrspace(7) %p - ret void -} - -;;; Scalable vector. This isn't semantically meaningful but shouldn't crash. - -define @load_nxv2i32(ptr addrspace(8) inreg %buf) { -; CHECK-LABEL: define @load_nxv2i32( -; CHECK-SAME: ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: [[RET:%.*]] = call @llvm.amdgcn.raw.ptr.buffer.load.nxv2i32(ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: ret [[RET]] -; - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - %ret = load , ptr addrspace(7) %p - ret %ret -} - -define void @store_nxv2i32( %data, ptr addrspace(8) inreg %buf) { -; CHECK-LABEL: define void @store_nxv2i32( -; CHECK-SAME: [[DATA:%.*]], ptr addrspace(8) inreg [[BUF:%.*]]) #[[ATTR0]] { -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.nxv2i32( [[DATA]], ptr addrspace(8) align 8 [[BUF]], i32 0, i32 0, i32 0) -; CHECK-NEXT: ret void -; - %p = addrspacecast ptr addrspace(8) %buf to ptr addrspace(7) - store %data, ptr addrspace(7) %p - ret void -} diff --git a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll index c3762e2cfff32..90fc3cf3d72ea 100644 --- a/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll +++ b/llvm/test/CodeGen/AMDGPU/lower-buffer-fat-pointers-unoptimized-debug-data.ll @@ -54,12 +54,7 @@ define float @debug_stash_pointer(ptr addrspace(8) %buf, i32 %idx, ptr addrspace ; CHECK-NEXT: [[TMP10:%.*]] = shl nuw i160 [[BUF_PTR_4_PTR_INT_RSRC]], 32, !dbg [[DBG33]] ; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_OFF:%.*]] = zext i32 [[BUF_PTR_4_PTR_OFF]] to i160, !dbg [[DBG33]] ; CHECK-NEXT: [[BUF_PTR_4_PTR_INT:%.*]] = or i160 [[TMP10]], [[BUF_PTR_4_PTR_INT_OFF]], !dbg [[DBG33]] -; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_LEGAL:%.*]] = bitcast i160 [[BUF_PTR_4_PTR_INT]] to <5 x i32>, !dbg [[DBG33]] -; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_SLICE_0:%.*]] = shufflevector <5 x i32> [[BUF_PTR_4_PTR_INT_LEGAL]], <5 x i32> poison, <4 x i32> , !dbg [[DBG33]] -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.v4i32(<4 x i32> [[BUF_PTR_4_PTR_INT_SLICE_0]], ptr addrspace(8) align 32 [[AUX_PTR_2_PTR_RSRC]], i32 [[AUX_PTR_2_PTR_OFF]], i32 0, i32 0), !dbg [[DBG33]] -; CHECK-NEXT: [[AUX_PTR_2_PTR_PART_4:%.*]] = add nuw i32 [[AUX_PTR_2_PTR_OFF]], 16, !dbg [[DBG33]] -; CHECK-NEXT: [[BUF_PTR_4_PTR_INT_SLICE_4:%.*]] = extractelement <5 x i32> [[BUF_PTR_4_PTR_INT_LEGAL]], i64 4, !dbg [[DBG33]] -; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i32(i32 [[BUF_PTR_4_PTR_INT_SLICE_4]], ptr addrspace(8) align 16 [[AUX_PTR_2_PTR_RSRC]], i32 [[AUX_PTR_2_PTR_PART_4]], i32 0, i32 0), !dbg [[DBG33]] +; CHECK-NEXT: call void @llvm.amdgcn.raw.ptr.buffer.store.i160(i160 [[BUF_PTR_4_PTR_INT]], ptr addrspace(8) align 32 [[AUX_PTR_2_PTR_RSRC]], i32 [[AUX_PTR_2_PTR_OFF]], i32 0, i32 0), !dbg [[DBG33]] ; CHECK-NEXT: ret float [[RET]], !dbg [[DBG34:![0-9]+]] ; %buf.ptr.var = alloca ptr addrspace(7), align 32, addrspace(5), !dbg !20