diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp index 81c7596530ee2..a794f3e9c5363 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizePointerCast.cpp @@ -168,6 +168,9 @@ class SPIRVLegalizePointerCast : public FunctionPass { assert(VecTy->getElementType() == ArrTy->getElementType() && "Element types of array and vector must be the same."); + const DataLayout &DL = B.GetInsertBlock()->getModule()->getDataLayout(); + uint64_t ElemSize = DL.getTypeAllocSize(ArrTy->getElementType()); + for (unsigned i = 0; i < VecTy->getNumElements(); ++i) { // Create a GEP to access the i-th element of the array. SmallVector Types = {DstArrayPtr->getType(), @@ -190,7 +193,8 @@ class SPIRVLegalizePointerCast : public FunctionPass { buildAssignType(B, VecTy->getElementType(), Element); Types = {Element->getType(), ElementPtr->getType()}; - Args = {Element, ElementPtr, B.getInt16(2), B.getInt8(Alignment.value())}; + Align NewAlign = commonAlignment(Alignment, i * ElemSize); + Args = {Element, ElementPtr, B.getInt16(2), B.getInt8(NewAlign.value())}; B.CreateIntrinsic(Intrinsic::spv_store, {Types}, {Args}); } } diff --git a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp index 30703ee40be06..40cb7dda53ba4 100644 --- a/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVLegalizerInfo.cpp @@ -114,6 +114,8 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) { v3s1, v3s8, v3s16, v3s32, v3s64, v4s1, v4s8, v4s16, v4s32, v4s64}; + auto allScalars = {s1, s8, s16, s32, s64}; + auto allScalarsAndVectors = { s1, s8, s16, s32, s64, s128, v2s1, v2s8, v2s16, v2s32, v2s64, v3s1, v3s8, v3s16, v3s32, v3s64, @@ -173,10 +175,45 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) { uint32_t MaxVectorSize = ST.isShader() ? 4 : 16; for (auto Opc : getTypeFoldingSupportedOpcodes()) { - if (Opc != G_EXTRACT_VECTOR_ELT) - getActionDefinitionsBuilder(Opc).custom(); + switch (Opc) { + case G_EXTRACT_VECTOR_ELT: + case G_UREM: + case G_SREM: + case G_UDIV: + case G_SDIV: + case G_FREM: + break; + default: + getActionDefinitionsBuilder(Opc) + .customFor(allScalars) + .customFor(allowedVectorTypes) + .moreElementsToNextPow2(0) + .fewerElementsIf(vectorElementCountIsGreaterThan(0, MaxVectorSize), + LegalizeMutations::changeElementCountTo( + 0, ElementCount::getFixed(MaxVectorSize))) + .custom(); + break; + } } + getActionDefinitionsBuilder({G_UREM, G_SREM, G_SDIV, G_UDIV, G_FREM}) + .customFor(allScalars) + .customFor(allowedVectorTypes) + .scalarizeIf(numElementsNotPow2(0), 0) + .fewerElementsIf(vectorElementCountIsGreaterThan(0, MaxVectorSize), + LegalizeMutations::changeElementCountTo( + 0, ElementCount::getFixed(MaxVectorSize))) + .custom(); + + getActionDefinitionsBuilder({G_FMA, G_STRICT_FMA}) + .legalFor(allScalars) + .legalFor(allowedVectorTypes) + .moreElementsToNextPow2(0) + .fewerElementsIf(vectorElementCountIsGreaterThan(0, MaxVectorSize), + LegalizeMutations::changeElementCountTo( + 0, ElementCount::getFixed(MaxVectorSize))) + .alwaysLegal(); + getActionDefinitionsBuilder(G_INTRINSIC_W_SIDE_EFFECTS).custom(); getActionDefinitionsBuilder(G_SHUFFLE_VECTOR) @@ -194,6 +231,13 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) { 1, ElementCount::getFixed(MaxVectorSize))) .custom(); + getActionDefinitionsBuilder(G_INSERT_VECTOR_ELT) + .moreElementsToNextPow2(0) + .fewerElementsIf(vectorElementCountIsGreaterThan(0, MaxVectorSize), + LegalizeMutations::changeElementCountTo( + 0, ElementCount::getFixed(MaxVectorSize))) + .custom(); + // Illegal G_UNMERGE_VALUES instructions should be handled // during the combine phase. getActionDefinitionsBuilder(G_BUILD_VECTOR) @@ -217,14 +261,13 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) { .lowerIf(vectorElementCountIsGreaterThan(1, MaxVectorSize)) .custom(); + // If the result is still illegal, the combiner should be able to remove it. getActionDefinitionsBuilder(G_CONCAT_VECTORS) - .legalIf(vectorElementCountIsLessThanOrEqualTo(0, MaxVectorSize)) - .moreElementsToNextPow2(0) - .lowerIf(vectorElementCountIsGreaterThan(0, MaxVectorSize)) - .alwaysLegal(); + .legalForCartesianProduct(allowedVectorTypes, allowedVectorTypes) + .moreElementsToNextPow2(0); getActionDefinitionsBuilder(G_SPLAT_VECTOR) - .legalIf(vectorElementCountIsLessThanOrEqualTo(0, MaxVectorSize)) + .legalFor(allowedVectorTypes) .moreElementsToNextPow2(0) .fewerElementsIf(vectorElementCountIsGreaterThan(0, MaxVectorSize), LegalizeMutations::changeElementSizeTo(0, MaxVectorSize)) @@ -273,9 +316,6 @@ SPIRVLegalizerInfo::SPIRVLegalizerInfo(const SPIRVSubtarget &ST) { .legalFor(allIntScalarsAndVectors) .legalIf(extendedScalarsAndVectors); - getActionDefinitionsBuilder({G_FMA, G_STRICT_FMA}) - .legalFor(allFloatScalarsAndVectors); - getActionDefinitionsBuilder(G_STRICT_FLDEXP) .legalForCartesianProduct(allFloatScalarsAndVectors, allIntScalars); @@ -461,6 +501,23 @@ static bool legalizeExtractVectorElt(LegalizerHelper &Helper, MachineInstr &MI, return true; } +static bool legalizeInsertVectorElt(LegalizerHelper &Helper, MachineInstr &MI, + SPIRVGlobalRegistry *GR) { + MachineIRBuilder &MIRBuilder = Helper.MIRBuilder; + Register DstReg = MI.getOperand(0).getReg(); + Register SrcReg = MI.getOperand(1).getReg(); + Register ValReg = MI.getOperand(2).getReg(); + Register IdxReg = MI.getOperand(3).getReg(); + + MIRBuilder + .buildIntrinsic(Intrinsic::spv_insertelt, ArrayRef{DstReg}) + .addUse(SrcReg) + .addUse(ValReg) + .addUse(IdxReg); + MI.eraseFromParent(); + return true; +} + static Register convertPtrToInt(Register Reg, LLT ConvTy, SPIRVType *SpvType, LegalizerHelper &Helper, MachineRegisterInfo &MRI, @@ -486,6 +543,8 @@ bool SPIRVLegalizerInfo::legalizeCustom( return legalizeBitcast(Helper, MI); case TargetOpcode::G_EXTRACT_VECTOR_ELT: return legalizeExtractVectorElt(Helper, MI, GR); + case TargetOpcode::G_INSERT_VECTOR_ELT: + return legalizeInsertVectorElt(Helper, MI, GR); case TargetOpcode::G_INTRINSIC: case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: return legalizeIntrinsic(Helper, MI); @@ -515,6 +574,15 @@ bool SPIRVLegalizerInfo::legalizeCustom( } } +static bool needsVectorLegalization(const LLT &Ty, const SPIRVSubtarget &ST) { + if (!Ty.isVector()) + return false; + unsigned NumElements = Ty.getNumElements(); + unsigned MaxVectorSize = ST.isShader() ? 4 : 16; + return (NumElements > 4 && !isPowerOf2_32(NumElements)) || + NumElements > MaxVectorSize; +} + bool SPIRVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, MachineInstr &MI) const { LLVM_DEBUG(dbgs() << "legalizeIntrinsic: " << MI); @@ -531,41 +599,38 @@ bool SPIRVLegalizerInfo::legalizeIntrinsic(LegalizerHelper &Helper, LLT DstTy = MRI.getType(DstReg); LLT SrcTy = MRI.getType(SrcReg); - int32_t MaxVectorSize = ST.isShader() ? 4 : 16; - - bool DstNeedsLegalization = false; - bool SrcNeedsLegalization = false; - - if (DstTy.isVector()) { - if (DstTy.getNumElements() > 4 && - !isPowerOf2_32(DstTy.getNumElements())) { - DstNeedsLegalization = true; - } - - if (DstTy.getNumElements() > MaxVectorSize) { - DstNeedsLegalization = true; - } - } - - if (SrcTy.isVector()) { - if (SrcTy.getNumElements() > 4 && - !isPowerOf2_32(SrcTy.getNumElements())) { - SrcNeedsLegalization = true; - } - - if (SrcTy.getNumElements() > MaxVectorSize) { - SrcNeedsLegalization = true; - } - } - // If an spv_bitcast needs to be legalized, we convert it to G_BITCAST to // allow using the generic legalization rules. - if (DstNeedsLegalization || SrcNeedsLegalization) { + if (needsVectorLegalization(DstTy, ST) || + needsVectorLegalization(SrcTy, ST)) { LLVM_DEBUG(dbgs() << "Replacing with a G_BITCAST\n"); MIRBuilder.buildBitcast(DstReg, SrcReg); MI.eraseFromParent(); } return true; + } else if (IntrinsicID == Intrinsic::spv_insertelt) { + Register DstReg = MI.getOperand(0).getReg(); + LLT DstTy = MRI.getType(DstReg); + + if (needsVectorLegalization(DstTy, ST)) { + Register SrcReg = MI.getOperand(2).getReg(); + Register ValReg = MI.getOperand(3).getReg(); + Register IdxReg = MI.getOperand(4).getReg(); + MIRBuilder.buildInsertVectorElement(DstReg, SrcReg, ValReg, IdxReg); + MI.eraseFromParent(); + } + return true; + } else if (IntrinsicID == Intrinsic::spv_extractelt) { + Register SrcReg = MI.getOperand(2).getReg(); + LLT SrcTy = MRI.getType(SrcReg); + + if (needsVectorLegalization(SrcTy, ST)) { + Register DstReg = MI.getOperand(0).getReg(); + Register IdxReg = MI.getOperand(3).getReg(); + MIRBuilder.buildExtractVectorElement(DstReg, SrcReg, IdxReg); + MI.eraseFromParent(); + } + return true; } return true; } diff --git a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp index c90e6d8cfbfb4..99edb937c3daa 100644 --- a/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVPostLegalizer.cpp @@ -16,6 +16,7 @@ #include "SPIRV.h" #include "SPIRVSubtarget.h" #include "SPIRVUtils.h" +#include "llvm/CodeGen/GlobalISel/GenericMachineInstrs.h" #include "llvm/IR/IntrinsicsSPIRV.h" #include "llvm/Support/Debug.h" #include @@ -66,8 +67,9 @@ static bool deduceAndAssignTypeForGUnmerge(MachineInstr *I, MachineFunction &MF, for (unsigned i = 0; i < I->getNumDefs() && !ScalarType; ++i) { for (const auto &Use : MRI.use_nodbg_instructions(I->getOperand(i).getReg())) { - assert(Use.getOpcode() == TargetOpcode::G_BUILD_VECTOR && - "Expected use of G_UNMERGE_VALUES to be a G_BUILD_VECTOR"); + if (Use.getOpcode() != TargetOpcode::G_BUILD_VECTOR) + continue; + if (auto *VecType = GR->getSPIRVTypeForVReg(Use.getOperand(0).getReg())) { ScalarType = GR->getScalarOrVectorComponentType(VecType); @@ -133,10 +135,10 @@ static SPIRVType *deduceTypeFromOperandRange(MachineInstr *I, return ResType; } -static SPIRVType *deduceTypeForResultRegister(MachineInstr *Use, - Register UseRegister, - SPIRVGlobalRegistry *GR, - MachineIRBuilder &MIB) { +static SPIRVType *deduceTypeFromResultRegister(MachineInstr *Use, + Register UseRegister, + SPIRVGlobalRegistry *GR, + MachineIRBuilder &MIB) { for (const MachineOperand &MO : Use->defs()) { if (!MO.isReg()) continue; @@ -159,16 +161,44 @@ static SPIRVType *deduceTypeFromUses(Register Reg, MachineFunction &MF, MachineRegisterInfo &MRI = MF.getRegInfo(); for (MachineInstr &Use : MRI.use_nodbg_instructions(Reg)) { SPIRVType *ResType = nullptr; + LLVM_DEBUG(dbgs() << "Looking at use " << Use); switch (Use.getOpcode()) { case TargetOpcode::G_BUILD_VECTOR: case TargetOpcode::G_EXTRACT_VECTOR_ELT: case TargetOpcode::G_UNMERGE_VALUES: - LLVM_DEBUG(dbgs() << "Looking at use " << Use << "\n"); - ResType = deduceTypeForResultRegister(&Use, Reg, GR, MIB); + case TargetOpcode::G_ADD: + case TargetOpcode::G_SUB: + case TargetOpcode::G_MUL: + case TargetOpcode::G_SDIV: + case TargetOpcode::G_UDIV: + case TargetOpcode::G_SREM: + case TargetOpcode::G_UREM: + case TargetOpcode::G_FADD: + case TargetOpcode::G_FSUB: + case TargetOpcode::G_FMUL: + case TargetOpcode::G_FDIV: + case TargetOpcode::G_FREM: + case TargetOpcode::G_FMA: + case TargetOpcode::G_STRICT_FMA: + ResType = deduceTypeFromResultRegister(&Use, Reg, GR, MIB); break; + case TargetOpcode::G_INTRINSIC_W_SIDE_EFFECTS: + case TargetOpcode::G_INTRINSIC: { + auto IntrinsicID = cast(Use).getIntrinsicID(); + if (IntrinsicID == Intrinsic::spv_insertelt) { + if (Reg == Use.getOperand(2).getReg()) + ResType = deduceTypeFromResultRegister(&Use, Reg, GR, MIB); + } else if (IntrinsicID == Intrinsic::spv_extractelt) { + if (Reg == Use.getOperand(2).getReg()) + ResType = deduceTypeFromResultRegister(&Use, Reg, GR, MIB); + } + break; + } } - if (ResType) + if (ResType) { + LLVM_DEBUG(dbgs() << "Deduced type from use " << *ResType); return ResType; + } } return nullptr; } @@ -296,20 +326,25 @@ static void registerSpirvTypeForNewInstructions(MachineFunction &MF, for (auto *I : Worklist) { MachineIRBuilder MIB(*I); - Register ResVReg = I->getOperand(0).getReg(); - const LLT &ResLLT = MRI.getType(ResVReg); - SPIRVType *ResType = nullptr; - if (ResLLT.isVector()) { - SPIRVType *CompType = GR->getOrCreateSPIRVIntegerType( - ResLLT.getElementType().getSizeInBits(), MIB); - ResType = GR->getOrCreateSPIRVVectorType( - CompType, ResLLT.getNumElements(), MIB, false); - } else { - ResType = GR->getOrCreateSPIRVIntegerType(ResLLT.getSizeInBits(), MIB); + for (unsigned Idx = 0; Idx < I->getNumDefs(); ++Idx) { + Register ResVReg = I->getOperand(Idx).getReg(); + if (GR->getSPIRVTypeForVReg(ResVReg)) + continue; + const LLT &ResLLT = MRI.getType(ResVReg); + SPIRVType *ResType = nullptr; + if (ResLLT.isVector()) { + SPIRVType *CompType = GR->getOrCreateSPIRVIntegerType( + ResLLT.getElementType().getSizeInBits(), MIB); + ResType = GR->getOrCreateSPIRVVectorType( + CompType, ResLLT.getNumElements(), MIB, false); + } else { + ResType = GR->getOrCreateSPIRVIntegerType(ResLLT.getSizeInBits(), MIB); + } + LLVM_DEBUG(dbgs() << "Could not determine type for " << ResVReg + << ", defaulting to " << *ResType << "\n"); + + setRegClassType(ResVReg, ResType, GR, &MRI, MF, true); } - LLVM_DEBUG(dbgs() << "Could not determine type for " << *I - << ", defaulting to " << *ResType << "\n"); - setRegClassType(ResVReg, ResType, GR, &MRI, MF, true); } } diff --git a/llvm/test/CodeGen/SPIRV/legalization/load-store-global.ll b/llvm/test/CodeGen/SPIRV/legalization/load-store-global.ll new file mode 100644 index 0000000000000..19b39ff59809a --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/legalization/load-store-global.ll @@ -0,0 +1,218 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: %[[#int:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#v4i32:]] = OpTypeVector %[[#int]] 4 +; CHECK-DAG: %[[#double:]] = OpTypeFloat 64 +; CHECK-DAG: %[[#v4f64:]] = OpTypeVector %[[#double]] 4 +; CHECK-DAG: %[[#v2i32:]] = OpTypeVector %[[#int]] 2 +; CHECK-DAG: %[[#ptr_private_v4i32:]] = OpTypePointer Private %[[#v4i32]] +; CHECK-DAG: %[[#ptr_private_v4f64:]] = OpTypePointer Private %[[#v4f64]] +; CHECK-DAG: %[[#global_double:]] = OpVariable %[[#ptr_private_v4f64]] Private +; CHECK-DAG: %[[#C15:]] = OpConstant %[[#int]] 15{{$}} +; CHECK-DAG: %[[#C14:]] = OpConstant %[[#int]] 14{{$}} +; CHECK-DAG: %[[#C13:]] = OpConstant %[[#int]] 13{{$}} +; CHECK-DAG: %[[#C12:]] = OpConstant %[[#int]] 12{{$}} +; CHECK-DAG: %[[#C11:]] = OpConstant %[[#int]] 11{{$}} +; CHECK-DAG: %[[#C10:]] = OpConstant %[[#int]] 10{{$}} +; CHECK-DAG: %[[#C9:]] = OpConstant %[[#int]] 9{{$}} +; CHECK-DAG: %[[#C8:]] = OpConstant %[[#int]] 8{{$}} +; CHECK-DAG: %[[#C7:]] = OpConstant %[[#int]] 7{{$}} +; CHECK-DAG: %[[#C6:]] = OpConstant %[[#int]] 6{{$}} +; CHECK-DAG: %[[#C5:]] = OpConstant %[[#int]] 5{{$}} +; CHECK-DAG: %[[#C4:]] = OpConstant %[[#int]] 4{{$}} +; CHECK-DAG: %[[#C3:]] = OpConstant %[[#int]] 3{{$}} +; CHECK-DAG: %[[#C2:]] = OpConstant %[[#int]] 2{{$}} +; CHECK-DAG: %[[#C1:]] = OpConstant %[[#int]] 1{{$}} +; CHECK-DAG: %[[#C0:]] = OpConstant %[[#int]] 0{{$}} + +@G_16 = internal addrspace(10) global [16 x i32] zeroinitializer +@G_4_double = internal addrspace(10) global <4 x double> zeroinitializer +@G_4_int = internal addrspace(10) global <4 x i32> zeroinitializer + + +; This is the way matrices will be represented in HLSL. The memory type will be +; an array, but it will be loaded as a vector. +define spir_func void @test_load_store_global() { +entry: +; CHECK-DAG: %[[#PTR0:]] = OpAccessChain %[[#ptr_int:]] %[[#G16:]] %[[#C0]] +; CHECK-DAG: %[[#VAL0:]] = OpLoad %[[#int]] %[[#PTR0]] Aligned 4 +; CHECK-DAG: %[[#PTR1:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C1]] +; CHECK-DAG: %[[#VAL1:]] = OpLoad %[[#int]] %[[#PTR1]] Aligned 4 +; CHECK-DAG: %[[#PTR2:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C2]] +; CHECK-DAG: %[[#VAL2:]] = OpLoad %[[#int]] %[[#PTR2]] Aligned 4 +; CHECK-DAG: %[[#PTR3:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C3]] +; CHECK-DAG: %[[#VAL3:]] = OpLoad %[[#int]] %[[#PTR3]] Aligned 4 +; CHECK-DAG: %[[#PTR4:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C4]] +; CHECK-DAG: %[[#VAL4:]] = OpLoad %[[#int]] %[[#PTR4]] Aligned 4 +; CHECK-DAG: %[[#PTR5:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C5]] +; CHECK-DAG: %[[#VAL5:]] = OpLoad %[[#int]] %[[#PTR5]] Aligned 4 +; CHECK-DAG: %[[#PTR6:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C6]] +; CHECK-DAG: %[[#VAL6:]] = OpLoad %[[#int]] %[[#PTR6]] Aligned 4 +; CHECK-DAG: %[[#PTR7:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C7]] +; CHECK-DAG: %[[#VAL7:]] = OpLoad %[[#int]] %[[#PTR7]] Aligned 4 +; CHECK-DAG: %[[#PTR8:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C8]] +; CHECK-DAG: %[[#VAL8:]] = OpLoad %[[#int]] %[[#PTR8]] Aligned 4 +; CHECK-DAG: %[[#PTR9:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C9]] +; CHECK-DAG: %[[#VAL9:]] = OpLoad %[[#int]] %[[#PTR9]] Aligned 4 +; CHECK-DAG: %[[#PTR10:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C10]] +; CHECK-DAG: %[[#VAL10:]] = OpLoad %[[#int]] %[[#PTR10]] Aligned 4 +; CHECK-DAG: %[[#PTR11:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C11]] +; CHECK-DAG: %[[#VAL11:]] = OpLoad %[[#int]] %[[#PTR11]] Aligned 4 +; CHECK-DAG: %[[#PTR12:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C12]] +; CHECK-DAG: %[[#VAL12:]] = OpLoad %[[#int]] %[[#PTR12]] Aligned 4 +; CHECK-DAG: %[[#PTR13:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C13]] +; CHECK-DAG: %[[#VAL13:]] = OpLoad %[[#int]] %[[#PTR13]] Aligned 4 +; CHECK-DAG: %[[#PTR14:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C14]] +; CHECK-DAG: %[[#VAL14:]] = OpLoad %[[#int]] %[[#PTR14]] Aligned 4 +; CHECK-DAG: %[[#PTR15:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C15]] +; CHECK-DAG: %[[#VAL15:]] = OpLoad %[[#int]] %[[#PTR15]] Aligned 4 +; CHECK-DAG: %[[#INS0:]] = OpCompositeInsert %[[#v4i32]] %[[#VAL0]] %[[#UNDEF:]] 0 +; CHECK-DAG: %[[#INS1:]] = OpCompositeInsert %[[#v4i32]] %[[#VAL1]] %[[#INS0]] 1 +; CHECK-DAG: %[[#INS2:]] = OpCompositeInsert %[[#v4i32]] %[[#VAL2]] %[[#INS1]] 2 +; CHECK-DAG: %[[#INS3:]] = OpCompositeInsert %[[#v4i32]] %[[#VAL3]] %[[#INS2]] 3 +; CHECK-DAG: %[[#INS4:]] = OpCompositeInsert %[[#v4i32]] %[[#VAL4]] %[[#UNDEF]] 0 +; CHECK-DAG: %[[#INS5:]] = OpCompositeInsert %[[#v4i32]] %[[#VAL5]] %[[#INS4]] 1 +; CHECK-DAG: %[[#INS6:]] = OpCompositeInsert %[[#v4i32]] %[[#VAL6]] %[[#INS5]] 2 +; CHECK-DAG: %[[#INS7:]] = OpCompositeInsert %[[#v4i32]] %[[#VAL7]] %[[#INS6]] 3 +; CHECK-DAG: %[[#INS8:]] = OpCompositeInsert %[[#v4i32]] %[[#VAL8]] %[[#UNDEF]] 0 +; CHECK-DAG: %[[#INS9:]] = OpCompositeInsert %[[#v4i32]] %[[#VAL9]] %[[#INS8]] 1 +; CHECK-DAG: %[[#INS10:]] = OpCompositeInsert %[[#v4i32]] %[[#VAL10]] %[[#INS9]] 2 +; CHECK-DAG: %[[#INS11:]] = OpCompositeInsert %[[#v4i32]] %[[#VAL11]] %[[#INS10]] 3 +; CHECK-DAG: %[[#INS12:]] = OpCompositeInsert %[[#v4i32]] %[[#VAL12]] %[[#UNDEF]] 0 +; CHECK-DAG: %[[#INS13:]] = OpCompositeInsert %[[#v4i32]] %[[#VAL13]] %[[#INS12]] 1 +; CHECK-DAG: %[[#INS14:]] = OpCompositeInsert %[[#v4i32]] %[[#VAL14]] %[[#INS13]] 2 +; CHECK-DAG: %[[#INS15:]] = OpCompositeInsert %[[#v4i32]] %[[#VAL15]] %[[#INS14]] 3 + %0 = load <16 x i32>, ptr addrspace(10) @G_16, align 64 + +; CHECK-DAG: %[[#PTR0_S:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C0]] +; CHECK-DAG: %[[#VAL0_S:]] = OpCompositeExtract %[[#int]] %[[#INS3]] 0 +; CHECK-DAG: OpStore %[[#PTR0_S]] %[[#VAL0_S]] Aligned 64 +; CHECK-DAG: %[[#PTR1_S:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C1]] +; CHECK-DAG: %[[#VAL1_S:]] = OpCompositeExtract %[[#int]] %[[#INS3]] 1 +; CHECK-DAG: OpStore %[[#PTR1_S]] %[[#VAL1_S]] Aligned 4 +; CHECK-DAG: %[[#PTR2_S:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C2]] +; CHECK-DAG: %[[#VAL2_S:]] = OpCompositeExtract %[[#int]] %[[#INS3]] 2 +; CHECK-DAG: OpStore %[[#PTR2_S]] %[[#VAL2_S]] Aligned 8 +; CHECK-DAG: %[[#PTR3_S:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C3]] +; CHECK-DAG: %[[#VAL3_S:]] = OpCompositeExtract %[[#int]] %[[#INS3]] 3 +; CHECK-DAG: OpStore %[[#PTR3_S]] %[[#VAL3_S]] Aligned 4 +; CHECK-DAG: %[[#PTR4_S:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C4]] +; CHECK-DAG: %[[#VAL4_S:]] = OpCompositeExtract %[[#int]] %[[#INS7]] 0 +; CHECK-DAG: OpStore %[[#PTR4_S]] %[[#VAL4_S]] Aligned 16 +; CHECK-DAG: %[[#PTR5_S:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C5]] +; CHECK-DAG: %[[#VAL5_S:]] = OpCompositeExtract %[[#int]] %[[#INS7]] 1 +; CHECK-DAG: OpStore %[[#PTR5_S]] %[[#VAL5_S]] Aligned 4 +; CHECK-DAG: %[[#PTR6_S:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C6]] +; CHECK-DAG: %[[#VAL6_S:]] = OpCompositeExtract %[[#int]] %[[#INS7]] 2 +; CHECK-DAG: OpStore %[[#PTR6_S]] %[[#VAL6_S]] Aligned 8 +; CHECK-DAG: %[[#PTR7_S:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C7]] +; CHECK-DAG: %[[#VAL7_S:]] = OpCompositeExtract %[[#int]] %[[#INS7]] 3 +; CHECK-DAG: OpStore %[[#PTR7_S]] %[[#VAL7_S]] Aligned 4 +; CHECK-DAG: %[[#PTR8_S:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C8]] +; CHECK-DAG: %[[#VAL8_S:]] = OpCompositeExtract %[[#int]] %[[#INS11]] 0 +; CHECK-DAG: OpStore %[[#PTR8_S]] %[[#VAL8_S]] Aligned 32 +; CHECK-DAG: %[[#PTR9_S:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C9]] +; CHECK-DAG: %[[#VAL9_S:]] = OpCompositeExtract %[[#int]] %[[#INS11]] 1 +; CHECK-DAG: OpStore %[[#PTR9_S]] %[[#VAL9_S]] Aligned 4 +; CHECK-DAG: %[[#PTR10_S:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C10]] +; CHECK-DAG: %[[#VAL10_S:]] = OpCompositeExtract %[[#int]] %[[#INS11]] 2 +; CHECK-DAG: OpStore %[[#PTR10_S]] %[[#VAL10_S]] Aligned 8 +; CHECK-DAG: %[[#PTR11_S:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C11]] +; CHECK-DAG: %[[#VAL11_S:]] = OpCompositeExtract %[[#int]] %[[#INS11]] 3 +; CHECK-DAG: OpStore %[[#PTR11_S]] %[[#VAL11_S]] Aligned 4 +; CHECK-DAG: %[[#PTR12_S:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C12]] +; CHECK-DAG: %[[#VAL12_S:]] = OpCompositeExtract %[[#int]] %[[#INS15]] 0 +; CHECK-DAG: OpStore %[[#PTR12_S]] %[[#VAL12_S]] Aligned 16 +; CHECK-DAG: %[[#PTR13_S:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C13]] +; CHECK-DAG: %[[#VAL13_S:]] = OpCompositeExtract %[[#int]] %[[#INS15]] 1 +; CHECK-DAG: OpStore %[[#PTR13_S]] %[[#VAL13_S]] Aligned 4 +; CHECK-DAG: %[[#PTR14_S:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C14]] +; CHECK-DAG: %[[#VAL14_S:]] = OpCompositeExtract %[[#int]] %[[#INS15]] 2 +; CHECK-DAG: OpStore %[[#PTR14_S]] %[[#VAL14_S]] Aligned 8 +; CHECK-DAG: %[[#PTR15_S:]] = OpAccessChain %[[#ptr_int]] %[[#G16]] %[[#C15]] +; CHECK-DAG: %[[#VAL15_S:]] = OpCompositeExtract %[[#int]] %[[#INS15]] 3 +; CHECK-DAG: OpStore %[[#PTR15_S]] %[[#VAL15_S]] Aligned 4 + store <16 x i32> %0, ptr addrspace(10) @G_16, align 64 + ret void +} + +define spir_func void @test_int32_double_conversion() { +; CHECK: OpFunction +entry: + ; CHECK: %[[#LOAD:]] = OpLoad %[[#v4f64]] %[[#global_double]] + ; CHECK: %[[#VEC_SHUF1:]] = OpVectorShuffle %{{[a-zA-Z0-9_]+}} %[[#LOAD]] %{{[a-zA-Z0-9_]+}} 0 1 + ; CHECK: %[[#VEC_SHUF2:]] = OpVectorShuffle %{{[a-zA-Z0-9_]+}} %[[#LOAD]] %{{[a-zA-Z0-9_]+}} 2 3 + ; CHECK: %[[#BITCAST1:]] = OpBitcast %[[#v4i32]] %[[#VEC_SHUF1]] + ; CHECK: %[[#BITCAST2:]] = OpBitcast %[[#v4i32]] %[[#VEC_SHUF2]] + %0 = load <8 x i32>, ptr addrspace(10) @G_4_double + + ; CHECK: %[[#EXTRACT1:]] = OpCompositeExtract %[[#int]] %[[#BITCAST1]] 0 + ; CHECK: %[[#EXTRACT2:]] = OpCompositeExtract %[[#int]] %[[#BITCAST1]] 2 + ; CHECK: %[[#EXTRACT3:]] = OpCompositeExtract %[[#int]] %[[#BITCAST2]] 0 + ; CHECK: %[[#EXTRACT4:]] = OpCompositeExtract %[[#int]] %[[#BITCAST2]] 2 + ; CHECK: %[[#CONSTRUCT1:]] = OpCompositeConstruct %[[#v4i32]] %[[#EXTRACT1]] %[[#EXTRACT2]] %[[#EXTRACT3]] %[[#EXTRACT4]] + %1 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> + + ; CHECK: %[[#EXTRACT5:]] = OpCompositeExtract %[[#int]] %[[#BITCAST1]] 1 + ; CHECK: %[[#EXTRACT6:]] = OpCompositeExtract %[[#int]] %[[#BITCAST1]] 3 + ; CHECK: %[[#EXTRACT7:]] = OpCompositeExtract %[[#int]] %[[#BITCAST2]] 1 + ; CHECK: %[[#EXTRACT8:]] = OpCompositeExtract %[[#int]] %[[#BITCAST2]] 3 + ; CHECK: %[[#CONSTRUCT2:]] = OpCompositeConstruct %[[#v4i32]] %[[#EXTRACT5]] %[[#EXTRACT6]] %[[#EXTRACT7]] %[[#EXTRACT8]] + %2 = shufflevector <8 x i32> %0, <8 x i32> poison, <4 x i32> + + ; CHECK: %[[#EXTRACT9:]] = OpCompositeExtract %[[#int]] %[[#CONSTRUCT1]] 0 + ; CHECK: %[[#EXTRACT10:]] = OpCompositeExtract %[[#int]] %[[#CONSTRUCT2]] 0 + ; CHECK: %[[#EXTRACT11:]] = OpCompositeExtract %[[#int]] %[[#CONSTRUCT1]] 1 + ; CHECK: %[[#EXTRACT12:]] = OpCompositeExtract %[[#int]] %[[#CONSTRUCT2]] 1 + ; CHECK: %[[#EXTRACT13:]] = OpCompositeExtract %[[#int]] %[[#CONSTRUCT1]] 2 + ; CHECK: %[[#EXTRACT14:]] = OpCompositeExtract %[[#int]] %[[#CONSTRUCT2]] 2 + ; CHECK: %[[#EXTRACT15:]] = OpCompositeExtract %[[#int]] %[[#CONSTRUCT1]] 3 + ; CHECK: %[[#EXTRACT16:]] = OpCompositeExtract %[[#int]] %[[#CONSTRUCT2]] 3 + ; CHECK: %[[#CONSTRUCT3:]] = OpCompositeConstruct %[[#v2i32]] %[[#EXTRACT9]] %[[#EXTRACT10]] + ; CHECK: %[[#CONSTRUCT4:]] = OpCompositeConstruct %[[#v2i32]] %[[#EXTRACT11]] %[[#EXTRACT12]] + ; CHECK: %[[#CONSTRUCT5:]] = OpCompositeConstruct %[[#v2i32]] %[[#EXTRACT13]] %[[#EXTRACT14]] + ; CHECK: %[[#CONSTRUCT6:]] = OpCompositeConstruct %[[#v2i32]] %[[#EXTRACT15]] %[[#EXTRACT16]] + %3 = shufflevector <4 x i32> %1, <4 x i32> %2, <8 x i32> + + ; CHECK: %[[#BITCAST3:]] = OpBitcast %[[#double]] %[[#CONSTRUCT3]] + ; CHECK: %[[#BITCAST4:]] = OpBitcast %[[#double]] %[[#CONSTRUCT4]] + ; CHECK: %[[#BITCAST5:]] = OpBitcast %[[#double]] %[[#CONSTRUCT5]] + ; CHECK: %[[#BITCAST6:]] = OpBitcast %[[#double]] %[[#CONSTRUCT6]] + ; CHECK: %[[#CONSTRUCT7:]] = OpCompositeConstruct %[[#v4f64]] %[[#BITCAST3]] %[[#BITCAST4]] %[[#BITCAST5]] %[[#BITCAST6]] + ; CHECK: OpStore %[[#global_double]] %[[#CONSTRUCT7]] Aligned 32 + store <8 x i32> %3, ptr addrspace(10) @G_4_double + ret void +} + +; CHECK: OpFunction +define spir_func void @test_double_to_int_implicit_conversion() { +entry: + +; CHECK: %[[#LOAD_V4F64:]] = OpLoad %[[#V4F64_TYPE:]] %[[#GLOBAL_DOUBLE_VAR:]] Aligned 32 +; CHECK: %[[#VEC_SHUF_01:]] = OpVectorShuffle %[[#V2F64_TYPE:]] %[[#LOAD_V4F64]] %[[#UNDEF_V2F64:]] 0 1 +; CHECK: %[[#VEC_SHUF_23:]] = OpVectorShuffle %[[#V2F64_TYPE:]] %[[#LOAD_V4F64]] %[[#UNDEF_V2F64]] 2 3 +; CHECK: %[[#BITCAST_V4I32_01:]] = OpBitcast %[[#V4I32_TYPE:]] %[[#VEC_SHUF_01]] +; CHECK: %[[#BITCAST_V4I32_23:]] = OpBitcast %[[#V4I32_TYPE]] %[[#VEC_SHUF_23]] + %0 = load <8 x i32>, ptr addrspace(10) @G_4_double, align 64 + +; CHECK: %[[#VEC_SHUF_0_0:]] = OpVectorShuffle %[[#V2I32_TYPE:]] %[[#BITCAST_V4I32_01]] %[[#UNDEF_V2I32:]] 0 1 +; CHECK: %[[#VEC_SHUF_0_1:]] = OpVectorShuffle %[[#V2I32_TYPE]] %[[#BITCAST_V4I32_01]] %[[#UNDEF_V2I32]] 2 3 +; CHECK: %[[#VEC_SHUF_1_0:]] = OpVectorShuffle %[[#V2I32_TYPE]] %[[#BITCAST_V4I32_23]] %[[#UNDEF_V2I32]] 0 1 +; CHECK: %[[#VEC_SHUF_1_1:]] = OpVectorShuffle %[[#V2I32_TYPE]] %[[#BITCAST_V4I32_23]] %[[#UNDEF_V2I32]] 2 3 +; CHECK: %[[#BITCAST_DOUBLE_0_0:]] = OpBitcast %[[#DOUBLE_TYPE:]] %[[#VEC_SHUF_0_0]] +; CHECK: %[[#BITCAST_DOUBLE_0_1:]] = OpBitcast %[[#DOUBLE_TYPE]] %[[#VEC_SHUF_0_1]] +; CHECK: %[[#BITCAST_DOUBLE_1_0:]] = OpBitcast %[[#DOUBLE_TYPE]] %[[#VEC_SHUF_1_0]] +; CHECK: %[[#BITCAST_DOUBLE_1_1:]] = OpBitcast %[[#DOUBLE_TYPE]] %[[#VEC_SHUF_1_1]] +; CHECK: %[[#COMPOSITE_CONSTRUCT:]] = OpCompositeConstruct %[[#V4F64_TYPE]] %[[#BITCAST_DOUBLE_0_0]] %[[#BITCAST_DOUBLE_0_1]] %[[#BITCAST_DOUBLE_1_0]] %[[#BITCAST_DOUBLE_1_1]] +; CHECK: OpStore %[[#GLOBAL_DOUBLE_VAR]] %[[#COMPOSITE_CONSTRUCT]] Aligned 64 + store <8 x i32> %0, ptr addrspace(10) @G_4_double, align 64 + ret void +} + +; Add a main function to make it a valid module for spirv-val +define void @main() #1 { + ret void +} + +attributes #1 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } diff --git a/llvm/test/CodeGen/SPIRV/legalization/vector-arithmetic-6.ll b/llvm/test/CodeGen/SPIRV/legalization/vector-arithmetic-6.ll new file mode 100644 index 0000000000000..d1cbfd4811c30 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/legalization/vector-arithmetic-6.ll @@ -0,0 +1,224 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: OpName %[[#main:]] "main" +; CHECK-DAG: %[[#float:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#v4f32:]] = OpTypeVector %[[#float]] 4 +; CHECK-DAG: %[[#int:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#c6:]] = OpConstant %[[#int]] 6 +; CHECK-DAG: %[[#v6f32:]] = OpTypeArray %[[#float]] %[[#c6]] +; CHECK-DAG: %[[#v6i32:]] = OpTypeArray %[[#int]] %[[#c6]] +; CHECK-DAG: %[[#ptr_ssbo_v6i32:]] = OpTypePointer Private %[[#v6i32]] +; CHECK-DAG: %[[#v4i32:]] = OpTypeVector %[[#int]] 4 +; CHECK-DAG: %[[#UNDEF:]] = OpUndef %[[#int]] + +@f1 = internal addrspace(10) global [4 x [6 x float] ] zeroinitializer +@f2 = internal addrspace(10) global [4 x [6 x float] ] zeroinitializer +@i1 = internal addrspace(10) global [4 x [6 x i32] ] zeroinitializer +@i2 = internal addrspace(10) global [4 x [6 x i32] ] zeroinitializer + +define void @main() local_unnamed_addr #0 { +; CHECK: %[[#main]] = OpFunction +entry: + %2 = getelementptr [4 x [6 x float] ], ptr addrspace(10) @f1, i32 0, i32 0 + %3 = load <6 x float>, ptr addrspace(10) %2, align 4 + %4 = getelementptr [4 x [6 x float] ], ptr addrspace(10) @f1, i32 0, i32 1 + %5 = load <6 x float>, ptr addrspace(10) %4, align 4 + %6 = getelementptr [4 x [6 x float] ], ptr addrspace(10) @f1, i32 0, i32 2 + %7 = load <6 x float>, ptr addrspace(10) %6, align 4 + %8 = getelementptr [4 x [6 x float] ], ptr addrspace(10) @f1, i32 0, i32 3 + %9 = load <6 x float>, ptr addrspace(10) %8, align 4 + + ; We expect the 6-element vectors to be widened to 8, then split into two vectors of size 4. + ; CHECK: %[[#Mul1:]] = OpFMul %[[#v4f32]] + ; CHECK: %[[#Mul2:]] = OpFMul %[[#v4f32]] + %10 = fmul reassoc nnan ninf nsz arcp afn <6 x float> %3, splat (float 3.000000e+00) + + ; CHECK: %[[#Add1:]] = OpFAdd %[[#v4f32]] %[[#Mul1]] + ; CHECK: %[[#Add2:]] = OpFAdd %[[#v4f32]] %[[#Mul2]] + %11 = fadd reassoc nnan ninf nsz arcp afn <6 x float> %10, %5 + + ; CHECK: %[[#Sub1:]] = OpFSub %[[#v4f32]] %[[#Add1]] + ; CHECK: %[[#Sub2:]] = OpFSub %[[#v4f32]] %[[#Add2]] + %13 = fsub reassoc nnan ninf nsz arcp afn <6 x float> %11, %9 + + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub1]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub1]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub1]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub1]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub2]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub2]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + + %14 = getelementptr [4 x [6 x float] ], ptr addrspace(10) @f2, i32 0, i32 0 + store <6 x float> %13, ptr addrspace(10) %14, align 4 + ret void +} + +; Test integer vector arithmetic operations +define void @test_int_vector_arithmetic() local_unnamed_addr #0 { +; CHECK: OpFunction +entry: + %2 = getelementptr [4 x [6 x i32] ], ptr addrspace(10) @i1, i32 0, i32 0 + %3 = load <6 x i32>, ptr addrspace(10) %2, align 4 + %4 = getelementptr [4 x [6 x i32] ], ptr addrspace(10) @i1, i32 0, i32 1 + %5 = load <6 x i32>, ptr addrspace(10) %4, align 4 + + ; CHECK: %[[#Add1:]] = OpIAdd %[[#v4i32]] + ; CHECK: %[[#Add2:]] = OpIAdd %[[#v4i32]] + %6 = add <6 x i32> %3, %5 + + ; CHECK: %[[#Sub1:]] = OpISub %[[#v4i32]] %[[#Add1]] + ; CHECK: %[[#Sub2:]] = OpISub %[[#v4i32]] %[[#Add2]] + %7 = sub <6 x i32> %6, %5 + + ; CHECK: %[[#Mul1:]] = OpIMul %[[#v4i32]] %[[#Sub1]] + ; CHECK: %[[#Mul2:]] = OpIMul %[[#v4i32]] %[[#Sub2]] + %8 = mul <6 x i32> %7, splat (i32 2) + + ; CHECK-DAG: %[[#E1:]] = OpCompositeExtract %[[#int]] %[[#Mul1]] 0 + ; CHECK-DAG: %[[#E2:]] = OpCompositeExtract %[[#int]] %[[#Mul1]] 1 + ; CHECK-DAG: %[[#E3:]] = OpCompositeExtract %[[#int]] %[[#Mul1]] 2 + ; CHECK-DAG: %[[#E4:]] = OpCompositeExtract %[[#int]] %[[#Mul1]] 3 + ; CHECK-DAG: %[[#E5:]] = OpCompositeExtract %[[#int]] %[[#Mul2]] 0 + ; CHECK-DAG: %[[#E6:]] = OpCompositeExtract %[[#int]] %[[#Mul2]] 1 + ; CHECK: %[[#SDiv1:]] = OpSDiv %[[#int]] %[[#E1]] + ; CHECK: %[[#SDiv2:]] = OpSDiv %[[#int]] %[[#E2]] + ; CHECK: %[[#SDiv3:]] = OpSDiv %[[#int]] %[[#E3]] + ; CHECK: %[[#SDiv4:]] = OpSDiv %[[#int]] %[[#E4]] + ; CHECK: %[[#SDiv5:]] = OpSDiv %[[#int]] %[[#E5]] + ; CHECK: %[[#SDiv6:]] = OpSDiv %[[#int]] %[[#E6]] + %9 = sdiv <6 x i32> %8, splat (i32 2) + + ; CHECK: %[[#UDiv1:]] = OpUDiv %[[#int]] %[[#SDiv1]] + ; CHECK: %[[#UDiv2:]] = OpUDiv %[[#int]] %[[#SDiv2]] + ; CHECK: %[[#UDiv3:]] = OpUDiv %[[#int]] %[[#SDiv3]] + ; CHECK: %[[#UDiv4:]] = OpUDiv %[[#int]] %[[#SDiv4]] + ; CHECK: %[[#UDiv5:]] = OpUDiv %[[#int]] %[[#SDiv5]] + ; CHECK: %[[#UDiv6:]] = OpUDiv %[[#int]] %[[#SDiv6]] + %10 = udiv <6 x i32> %9, splat (i32 1) + + ; CHECK: %[[#SRem1:]] = OpSRem %[[#int]] %[[#UDiv1]] + ; CHECK: %[[#SRem2:]] = OpSRem %[[#int]] %[[#UDiv2]] + ; CHECK: %[[#SRem3:]] = OpSRem %[[#int]] %[[#UDiv3]] + ; CHECK: %[[#SRem4:]] = OpSRem %[[#int]] %[[#UDiv4]] + ; CHECK: %[[#SRem5:]] = OpSRem %[[#int]] %[[#UDiv5]] + ; CHECK: %[[#SRem6:]] = OpSRem %[[#int]] %[[#UDiv6]] + %11 = srem <6 x i32> %10, splat (i32 3) + + ; CHECK: %[[#UMod1:]] = OpUMod %[[#int]] %[[#SRem1]] + ; CHECK: %[[#UMod2:]] = OpUMod %[[#int]] %[[#SRem2]] + ; CHECK: %[[#UMod3:]] = OpUMod %[[#int]] %[[#SRem3]] + ; CHECK: %[[#UMod4:]] = OpUMod %[[#int]] %[[#SRem4]] + ; CHECK: %[[#UMod5:]] = OpUMod %[[#int]] %[[#SRem5]] + ; CHECK: %[[#UMod6:]] = OpUMod %[[#int]] %[[#SRem6]] + %12 = urem <6 x i32> %11, splat (i32 3) + + ; CHECK: %[[#Construct1:]] = OpCompositeConstruct %[[#v4i32]] %[[#UMod1]] %[[#UMod2]] %[[#UMod3]] %[[#UMod4]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#Construct1]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#Construct2:]] = OpCompositeConstruct %[[#v4i32]] %[[#UMod1]] %[[#UMod2]] %[[#UMod3]] %[[#UMod4]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#Construct2]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#Construct3:]] = OpCompositeConstruct %[[#v4i32]] %[[#UMod1]] %[[#UMod2]] %[[#UMod3]] %[[#UMod4]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#Construct3]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#Construct4:]] = OpCompositeConstruct %[[#v4i32]] %[[#UMod1]] %[[#UMod2]] %[[#UMod3]] %[[#UMod4]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#Construct4]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#Construct5:]] = OpCompositeConstruct %[[#v4i32]] %[[#UMod5]] %[[#UMod6]] %[[#UNDEF]] %[[#UNDEF]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#Construct5]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#Construct6:]] = OpCompositeConstruct %[[#v4i32]] %[[#UMod5]] %[[#UMod6]] %[[#UNDEF]] %[[#UNDEF]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#Construct6]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + + %13 = getelementptr [4 x [6 x i32] ], ptr addrspace(10) @i2, i32 0, i32 0 + store <6 x i32> %12, ptr addrspace(10) %13, align 4 + ret void +} + +; Test remaining float vector arithmetic operations +define void @test_float_vector_arithmetic_continued() local_unnamed_addr #0 { +; CHECK: OpFunction +entry: + %2 = getelementptr [4 x [6 x float] ], ptr addrspace(10) @f1, i32 0, i32 0 + %3 = load <6 x float>, ptr addrspace(10) %2, align 4 + %4 = getelementptr [4 x [6 x float] ], ptr addrspace(10) @f1, i32 0, i32 1 + %5 = load <6 x float>, ptr addrspace(10) %4, align 4 + + ; CHECK: %[[#FDiv1:]] = OpFDiv %[[#v4f32]] + ; CHECK: %[[#FDiv2:]] = OpFDiv %[[#v4f32]] + %6 = fdiv reassoc nnan ninf nsz arcp afn <6 x float> %3, splat (float 2.000000e+00) + + ; CHECK: OpFRem %[[#float]] + ; CHECK: OpFRem %[[#float]] + ; CHECK: OpFRem %[[#float]] + ; CHECK: OpFRem %[[#float]] + ; CHECK: OpFRem %[[#float]] + ; CHECK: OpFRem %[[#float]] + %7 = frem reassoc nnan ninf nsz arcp afn <6 x float> %6, splat (float 3.000000e+00) + + ; CHECK: %[[#Fma1:]] = OpExtInst %[[#v4f32]] {{.*}} Fma + ; CHECK: %[[#Fma2:]] = OpExtInst %[[#v4f32]] {{.*}} Fma + %8 = call reassoc nnan ninf nsz arcp afn <6 x float> @llvm.fma.v6f32(<6 x float> %5, <6 x float> %6, <6 x float> %7) + + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma1]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma1]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma1]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma1]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma2]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma2]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + + %9 = getelementptr [4 x [6 x float] ], ptr addrspace(10) @f2, i32 0, i32 0 + store <6 x float> %8, ptr addrspace(10) %9, align 4 + ret void +} + +; Test constrained fma vector arithmetic operations +define void @test_constrained_fma_vector() local_unnamed_addr #0 { +; CHECK: OpFunction +entry: + %2 = getelementptr [4 x [6 x float] ], ptr addrspace(10) @f1, i32 0, i32 0 + %3 = load <6 x float>, ptr addrspace(10) %2, align 4 + %4 = getelementptr [4 x [6 x float] ], ptr addrspace(10) @f1, i32 0, i32 1 + %5 = load <6 x float>, ptr addrspace(10) %4, align 4 + %6 = getelementptr [4 x [6 x float] ], ptr addrspace(10) @f1, i32 0, i32 2 + %7 = load <6 x float>, ptr addrspace(10) %6, align 4 + + ; CHECK: %[[#Fma1:]] = OpExtInst %[[#v4f32]] {{.*}} Fma + ; CHECK: %[[#Fma2:]] = OpExtInst %[[#v4f32]] {{.*}} Fma + %8 = call <6 x float> @llvm.experimental.constrained.fma.v6f32(<6 x float> %3, <6 x float> %5, <6 x float> %7, metadata !"round.dynamic", metadata !"fpexcept.strict") + + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma1]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma1]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma1]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma1]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma2]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma2]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + + %9 = getelementptr [4 x [6 x float] ], ptr addrspace(10) @f2, i32 0, i32 0 + store <6 x float> %8, ptr addrspace(10) %9, align 4 + ret void +} + +declare <6 x float> @llvm.experimental.constrained.fma.v6f32(<6 x float>, <6 x float>, <6 x float>, metadata, metadata) + +attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" } \ No newline at end of file diff --git a/llvm/test/CodeGen/SPIRV/legalization/vector-arithmetic.ll b/llvm/test/CodeGen/SPIRV/legalization/vector-arithmetic.ll new file mode 100644 index 0000000000000..dd1e3d60a52bf --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/legalization/vector-arithmetic.ll @@ -0,0 +1,299 @@ +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv-unknown-vulkan %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv-unknown-vulkan %s -o - -filetype=obj | spirv-val %} + +; CHECK-DAG: OpName %[[#main:]] "main" +; CHECK-DAG: %[[#float:]] = OpTypeFloat 32 +; CHECK-DAG: %[[#v4f32:]] = OpTypeVector %[[#float]] 4 +; CHECK-DAG: %[[#int:]] = OpTypeInt 32 0 +; CHECK-DAG: %[[#c16:]] = OpConstant %[[#int]] 16 +; CHECK-DAG: %[[#v16f32:]] = OpTypeArray %[[#float]] %[[#c16]] +; CHECK-DAG: %[[#v16i32:]] = OpTypeArray %[[#int]] %[[#c16]] +; CHECK-DAG: %[[#ptr_ssbo_v16i32:]] = OpTypePointer Private %[[#v16i32]] +; CHECK-DAG: %[[#v4i32:]] = OpTypeVector %[[#int]] 4 + +@f1 = internal addrspace(10) global [4 x [16 x float] ] zeroinitializer +@f2 = internal addrspace(10) global [4 x [16 x float] ] zeroinitializer +@i1 = internal addrspace(10) global [4 x [16 x i32] ] zeroinitializer +@i2 = internal addrspace(10) global [4 x [16 x i32] ] zeroinitializer + +define void @main() local_unnamed_addr #0 { +; CHECK: %[[#main]] = OpFunction +entry: + %2 = getelementptr [4 x [16 x float] ], ptr addrspace(10) @f1, i32 0, i32 0 + %3 = load <16 x float>, ptr addrspace(10) %2, align 4 + %4 = getelementptr [4 x [16 x float] ], ptr addrspace(10) @f1, i32 0, i32 1 + %5 = load <16 x float>, ptr addrspace(10) %4, align 4 + %6 = getelementptr [4 x [16 x float] ], ptr addrspace(10) @f1, i32 0, i32 2 + %7 = load <16 x float>, ptr addrspace(10) %6, align 4 + %8 = getelementptr [4 x [16 x float] ], ptr addrspace(10) @f1, i32 0, i32 3 + %9 = load <16 x float>, ptr addrspace(10) %8, align 4 + + ; We expect the large vectors to be split into size 4, and the operations performed on them. + ; CHECK: %[[#Mul1:]] = OpFMul %[[#v4f32]] + ; CHECK: %[[#Mul2:]] = OpFMul %[[#v4f32]] + ; CHECK: %[[#Mul3:]] = OpFMul %[[#v4f32]] + ; CHECK: %[[#Mul4:]] = OpFMul %[[#v4f32]] + %10 = fmul reassoc nnan ninf nsz arcp afn <16 x float> %3, splat (float 3.000000e+00) + + ; CHECK: %[[#Add1:]] = OpFAdd %[[#v4f32]] %[[#Mul1]] + ; CHECK: %[[#Add2:]] = OpFAdd %[[#v4f32]] %[[#Mul2]] + ; CHECK: %[[#Add3:]] = OpFAdd %[[#v4f32]] %[[#Mul3]] + ; CHECK: %[[#Add4:]] = OpFAdd %[[#v4f32]] %[[#Mul4]] + %11 = fadd reassoc nnan ninf nsz arcp afn <16 x float> %10, %5 + + ; CHECK: %[[#Sub1:]] = OpFSub %[[#v4f32]] %[[#Add1]] + ; CHECK: %[[#Sub2:]] = OpFSub %[[#v4f32]] %[[#Add2]] + ; CHECK: %[[#Sub3:]] = OpFSub %[[#v4f32]] %[[#Add3]] + ; CHECK: %[[#Sub4:]] = OpFSub %[[#v4f32]] %[[#Add4]] + %13 = fsub reassoc nnan ninf nsz arcp afn <16 x float> %11, %9 + + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub1]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub1]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub1]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub1]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub2]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub2]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub2]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub2]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub3]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub3]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub3]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub3]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub4]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub4]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub4]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Sub4]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + + %14 = getelementptr [4 x [16 x float] ], ptr addrspace(10) @f2, i32 0, i32 0 + store <16 x float> %13, ptr addrspace(10) %14, align 4 + ret void +} + +; Test integer vector arithmetic operations +define void @test_int_vector_arithmetic() local_unnamed_addr #0 { +; CHECK: OpFunction +entry: + %2 = getelementptr [4 x [16 x i32] ], ptr addrspace(10) @i1, i32 0, i32 0 + %3 = load <16 x i32>, ptr addrspace(10) %2, align 4 + %4 = getelementptr [4 x [16 x i32] ], ptr addrspace(10) @i1, i32 0, i32 1 + %5 = load <16 x i32>, ptr addrspace(10) %4, align 4 + + ; CHECK: %[[#Add1:]] = OpIAdd %[[#v4i32]] + ; CHECK: %[[#Add2:]] = OpIAdd %[[#v4i32]] + ; CHECK: %[[#Add3:]] = OpIAdd %[[#v4i32]] + ; CHECK: %[[#Add4:]] = OpIAdd %[[#v4i32]] + %6 = add <16 x i32> %3, %5 + + ; CHECK: %[[#Sub1:]] = OpISub %[[#v4i32]] %[[#Add1]] + ; CHECK: %[[#Sub2:]] = OpISub %[[#v4i32]] %[[#Add2]] + ; CHECK: %[[#Sub3:]] = OpISub %[[#v4i32]] %[[#Add3]] + ; CHECK: %[[#Sub4:]] = OpISub %[[#v4i32]] %[[#Add4]] + %7 = sub <16 x i32> %6, %5 + + ; CHECK: %[[#Mul1:]] = OpIMul %[[#v4i32]] %[[#Sub1]] + ; CHECK: %[[#Mul2:]] = OpIMul %[[#v4i32]] %[[#Sub2]] + ; CHECK: %[[#Mul3:]] = OpIMul %[[#v4i32]] %[[#Sub3]] + ; CHECK: %[[#Mul4:]] = OpIMul %[[#v4i32]] %[[#Sub4]] + %8 = mul <16 x i32> %7, splat (i32 2) + + ; CHECK: %[[#SDiv1:]] = OpSDiv %[[#v4i32]] %[[#Mul1]] + ; CHECK: %[[#SDiv2:]] = OpSDiv %[[#v4i32]] %[[#Mul2]] + ; CHECK: %[[#SDiv3:]] = OpSDiv %[[#v4i32]] %[[#Mul3]] + ; CHECK: %[[#SDiv4:]] = OpSDiv %[[#v4i32]] %[[#Mul4]] + %9 = sdiv <16 x i32> %8, splat (i32 2) + + ; CHECK: %[[#UDiv1:]] = OpUDiv %[[#v4i32]] %[[#SDiv1]] + ; CHECK: %[[#UDiv2:]] = OpUDiv %[[#v4i32]] %[[#SDiv2]] + ; CHECK: %[[#UDiv3:]] = OpUDiv %[[#v4i32]] %[[#SDiv3]] + ; CHECK: %[[#UDiv4:]] = OpUDiv %[[#v4i32]] %[[#SDiv4]] + %10 = udiv <16 x i32> %9, splat (i32 1) + + ; CHECK: %[[#SRem1:]] = OpSRem %[[#v4i32]] %[[#UDiv1]] + ; CHECK: %[[#SRem2:]] = OpSRem %[[#v4i32]] %[[#UDiv2]] + ; CHECK: %[[#SRem3:]] = OpSRem %[[#v4i32]] %[[#UDiv3]] + ; CHECK: %[[#SRem4:]] = OpSRem %[[#v4i32]] %[[#UDiv4]] + %11 = srem <16 x i32> %10, splat (i32 3) + + ; CHECK: %[[#UMod1:]] = OpUMod %[[#v4i32]] %[[#SRem1]] + ; CHECK: %[[#UMod2:]] = OpUMod %[[#v4i32]] %[[#SRem2]] + ; CHECK: %[[#UMod3:]] = OpUMod %[[#v4i32]] %[[#SRem3]] + ; CHECK: %[[#UMod4:]] = OpUMod %[[#v4i32]] %[[#SRem4]] + %12 = urem <16 x i32> %11, splat (i32 3) + + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#UMod1]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#UMod1]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#UMod1]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#UMod1]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#UMod2]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#UMod2]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#UMod2]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#UMod2]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#UMod3]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#UMod3]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#UMod3]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#UMod3]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#UMod4]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#UMod4]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#UMod4]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#int]] %[[#UMod4]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + + %13 = getelementptr [4 x [16 x i32] ], ptr addrspace(10) @i2, i32 0, i32 0 + store <16 x i32> %12, ptr addrspace(10) %13, align 4 + ret void +} + +; Test remaining float vector arithmetic operations +define void @test_float_vector_arithmetic_continued() local_unnamed_addr #0 { +; CHECK: OpFunction +entry: + %2 = getelementptr [4 x [16 x float] ], ptr addrspace(10) @f1, i32 0, i32 0 + %3 = load <16 x float>, ptr addrspace(10) %2, align 4 + %4 = getelementptr [4 x [16 x float] ], ptr addrspace(10) @f1, i32 0, i32 1 + %5 = load <16 x float>, ptr addrspace(10) %4, align 4 + + ; CHECK: %[[#FDiv1:]] = OpFDiv %[[#v4f32]] + ; CHECK: %[[#FDiv2:]] = OpFDiv %[[#v4f32]] + ; CHECK: %[[#FDiv3:]] = OpFDiv %[[#v4f32]] + ; CHECK: %[[#FDiv4:]] = OpFDiv %[[#v4f32]] + %6 = fdiv reassoc nnan ninf nsz arcp afn <16 x float> %3, splat (float 2.000000e+00) + + ; CHECK: %[[#FRem1:]] = OpFRem %[[#v4f32]] %[[#FDiv1]] + ; CHECK: %[[#FRem2:]] = OpFRem %[[#v4f32]] %[[#FDiv2]] + ; CHECK: %[[#FRem3:]] = OpFRem %[[#v4f32]] %[[#FDiv3]] + ; CHECK: %[[#FRem4:]] = OpFRem %[[#v4f32]] %[[#FDiv4]] + %7 = frem reassoc nnan ninf nsz arcp afn <16 x float> %6, splat (float 3.000000e+00) + + ; CHECK: %[[#Fma1:]] = OpExtInst %[[#v4f32]] {{.*}} Fma {{.*}} %[[#FDiv1]] %[[#FRem1]] + ; CHECK: %[[#Fma2:]] = OpExtInst %[[#v4f32]] {{.*}} Fma {{.*}} %[[#FDiv2]] %[[#FRem2]] + ; CHECK: %[[#Fma3:]] = OpExtInst %[[#v4f32]] {{.*}} Fma {{.*}} %[[#FDiv3]] %[[#FRem3]] + ; CHECK: %[[#Fma4:]] = OpExtInst %[[#v4f32]] {{.*}} Fma {{.*}} %[[#FDiv4]] %[[#FRem4]] + %8 = call reassoc nnan ninf nsz arcp afn <16 x float> @llvm.fma.v16f32(<16 x float> %5, <16 x float> %6, <16 x float> %7) + + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma1]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma1]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma1]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma1]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma2]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma2]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma2]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma2]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma3]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma3]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma3]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma3]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma4]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma4]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma4]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma4]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + + %9 = getelementptr [4 x [16 x float] ], ptr addrspace(10) @f2, i32 0, i32 0 + store <16 x float> %8, ptr addrspace(10) %9, align 4 + ret void +} + +; Test constrained fma vector arithmetic operations +define void @test_constrained_fma_vector() local_unnamed_addr #0 { +; CHECK: OpFunction +entry: + %2 = getelementptr [4 x [16 x float] ], ptr addrspace(10) @f1, i32 0, i32 0 + %3 = load <16 x float>, ptr addrspace(10) %2, align 4 + %4 = getelementptr [4 x [16 x float] ], ptr addrspace(10) @f1, i32 0, i32 1 + %5 = load <16 x float>, ptr addrspace(10) %4, align 4 + %6 = getelementptr [4 x [16 x float] ], ptr addrspace(10) @f1, i32 0, i32 2 + %7 = load <16 x float>, ptr addrspace(10) %6, align 4 + + ; CHECK: %[[#Fma1:]] = OpExtInst %[[#v4f32]] {{.*}} Fma + ; CHECK: %[[#Fma2:]] = OpExtInst %[[#v4f32]] {{.*}} Fma + ; CHECK: %[[#Fma3:]] = OpExtInst %[[#v4f32]] {{.*}} Fma + ; CHECK: %[[#Fma4:]] = OpExtInst %[[#v4f32]] {{.*}} Fma + %8 = call <16 x float> @llvm.experimental.constrained.fma.v16f32(<16 x float> %3, <16 x float> %5, <16 x float> %7, metadata !"round.dynamic", metadata !"fpexcept.strict") + + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma1]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma1]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma1]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma1]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma2]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma2]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma2]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma2]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma3]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma3]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma3]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma3]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma4]] 0 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma4]] 1 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma4]] 2 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + ; CHECK: %[[#EXTRACT:]] = OpCompositeExtract %[[#float]] %[[#Fma4]] 3 + ; CHECK: OpStore {{.*}} %[[#EXTRACT]] + + %9 = getelementptr [4 x [16 x float] ], ptr addrspace(10) @f2, i32 0, i32 0 + store <16 x float> %8, ptr addrspace(10) %9, align 4 + ret void +} + +declare <16 x float> @llvm.experimental.constrained.fma.v16f32(<16 x float>, <16 x float>, <16 x float>, metadata, metadata) + +attributes #0 = { "hlsl.numthreads"="1,1,1" "hlsl.shader"="compute" }