diff --git a/llvm/docs/SPIRVUsage.rst b/llvm/docs/SPIRVUsage.rst index 6ff8034cac00c..4d8b7996dec01 100644 --- a/llvm/docs/SPIRVUsage.rst +++ b/llvm/docs/SPIRVUsage.rst @@ -153,6 +153,8 @@ list of supported SPIR-V extensions, sorted alphabetically by their extension na - Adds atomic add instruction on floating-point numbers. * - ``SPV_EXT_shader_atomic_float_min_max`` - Adds atomic min and max instruction on floating-point numbers. + * - ``SPV_INTEL_2d_block_io`` + - Adds additional subgroup block prefetch, load, load transposed, load transformed and store instructions to read two-dimensional blocks of data from a two-dimensional region of memory, or to write two-dimensional blocks of data to a two dimensional region of memory. * - ``SPV_INTEL_arbitrary_precision_integers`` - Allows generating arbitrary width integer types. * - ``SPV_INTEL_bindless_images`` diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp index c272f41193efa..29e4046146329 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.cpp @@ -1076,6 +1076,24 @@ static bool buildTernaryBitwiseFunctionINTELInst( return true; } +/// Helper function for building Intel's 2d block io instructions. +static bool build2DBlockIOINTELInst(const SPIRV::IncomingCall *Call, + unsigned Opcode, + MachineIRBuilder &MIRBuilder, + SPIRVGlobalRegistry *GR) { + // Generate SPIRV instruction accordingly. + if (Call->isSpirvOp()) + return buildOpFromWrapper(MIRBuilder, Opcode, Call, Register(0)); + + auto MIB = MIRBuilder.buildInstr(Opcode) + .addDef(Call->ReturnRegister) + .addUse(GR->getSPIRVTypeID(Call->ReturnType)); + for (unsigned i = 0; i < Call->Arguments.size(); ++i) + MIB.addUse(Call->Arguments[i]); + + return true; +} + static unsigned getNumComponentsForDim(SPIRV::Dim::Dim dim) { switch (dim) { case SPIRV::Dim::DIM_1D: @@ -2319,6 +2337,17 @@ generateTernaryBitwiseFunctionINTELInst(const SPIRV::IncomingCall *Call, return buildTernaryBitwiseFunctionINTELInst(Call, Opcode, MIRBuilder, GR); } +static bool generate2DBlockIOINTELInst(const SPIRV::IncomingCall *Call, + MachineIRBuilder &MIRBuilder, + SPIRVGlobalRegistry *GR) { + // Lookup the instruction opcode in the TableGen records. + const SPIRV::DemangledBuiltin *Builtin = Call->Builtin; + unsigned Opcode = + SPIRV::lookupNativeBuiltin(Builtin->Name, Builtin->Set)->Opcode; + + return build2DBlockIOINTELInst(Call, Opcode, MIRBuilder, GR); +} + static bool buildNDRange(const SPIRV::IncomingCall *Call, MachineIRBuilder &MIRBuilder, SPIRVGlobalRegistry *GR) { @@ -2902,6 +2931,8 @@ std::optional lowerBuiltin(const StringRef DemangledCall, return generateBindlessImageINTELInst(Call.get(), MIRBuilder, GR); case SPIRV::TernaryBitwiseINTEL: return generateTernaryBitwiseFunctionINTELInst(Call.get(), MIRBuilder, GR); + case SPIRV::Block2DLoadStore: + return generate2DBlockIOINTELInst(Call.get(), MIRBuilder, GR); } return false; } diff --git a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td index 59cd38126cc01..6842e5ff067cf 100644 --- a/llvm/lib/Target/SPIRV/SPIRVBuiltins.td +++ b/llvm/lib/Target/SPIRV/SPIRVBuiltins.td @@ -68,6 +68,7 @@ def ICarryBorrow : BuiltinGroup; def ExtendedBitOps : BuiltinGroup; def BindlessINTEL : BuiltinGroup; def TernaryBitwiseINTEL : BuiltinGroup; +def Block2DLoadStore : BuiltinGroup; //===----------------------------------------------------------------------===// // Class defining a demangled builtin record. The information in the record @@ -718,6 +719,13 @@ defm : DemangledNativeBuiltin<"__spirv_ConvertHandleToSampledImageINTEL", OpenCL // SPV_INTEL_ternary_bitwise_function builtin records: defm : DemangledNativeBuiltin<"__spirv_BitwiseFunctionINTEL", OpenCL_std, TernaryBitwiseINTEL, 4, 4, OpBitwiseFunctionINTEL>; +// SPV_INTEL_2d_block_io builtin records +defm : DemangledNativeBuiltin<"__spirv_Subgroup2DBlockLoadINTEL", OpenCL_std, Block2DLoadStore, 10, 10, OpSubgroup2DBlockLoadINTEL>; +defm : DemangledNativeBuiltin<"__spirv_Subgroup2DBlockLoadTransposeINTEL", OpenCL_std, Block2DLoadStore, 10, 10, OpSubgroup2DBlockLoadTransposeINTEL>; +defm : DemangledNativeBuiltin<"__spirv_Subgroup2DBlockLoadTransformINTEL", OpenCL_std, Block2DLoadStore, 10, 10, OpSubgroup2DBlockLoadTransformINTEL>; +defm : DemangledNativeBuiltin<"__spirv_Subgroup2DBlockPrefetchINTEL", OpenCL_std, Block2DLoadStore, 9, 9, OpSubgroup2DBlockPrefetchINTEL>; +defm : DemangledNativeBuiltin<"__spirv_Subgroup2DBlockStoreINTEL", OpenCL_std, Block2DLoadStore, 10, 10, OpSubgroup2DBlockStoreINTEL>; + //===----------------------------------------------------------------------===// // Class defining a work/sub group builtin that should be translated into a // SPIR-V instruction using the defined properties. diff --git a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp index 55a468f215c46..27c37339d6703 100644 --- a/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVCommandLine.cpp @@ -97,7 +97,9 @@ static const std::map> SPIRV::Extension::Extension:: SPV_INTEL_subgroup_matrix_multiply_accumulate}, {"SPV_INTEL_ternary_bitwise_function", - SPIRV::Extension::Extension::SPV_INTEL_ternary_bitwise_function}}; + SPIRV::Extension::Extension::SPV_INTEL_ternary_bitwise_function}, + {"SPV_INTEL_2d_block_io", + SPIRV::Extension::Extension::SPV_INTEL_2d_block_io}}; bool SPIRVExtensionsParser::parse(cl::Option &O, StringRef ArgName, StringRef ArgValue, diff --git a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td index 6d8c84945d7d4..338f6809a3e46 100644 --- a/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td +++ b/llvm/lib/Target/SPIRV/SPIRVInstrInfo.td @@ -936,3 +936,20 @@ def OpAliasScopeListDeclINTEL: Op<5913, (outs ID:$res), (ins variable_ops), // SPV_INTEL_ternary_bitwise_function def OpBitwiseFunctionINTEL: Op<6242, (outs ID:$res), (ins TYPE:$type, ID:$a, ID:$b, ID:$c, ID:$lut_index), "$res = OpBitwiseFunctionINTEL $type $a $b $c $lut_index">; + +// SPV_INTEL_2d_block_io +def OpSubgroup2DBlockLoadINTEL: Op<6231, (outs), (ins ID:$element_size, ID:$block_width, ID:$block_height, + ID:$block_count, ID:$src_base_ptr, ID:$memory_width, ID:$memory_height, ID:$memory_pitch, ID:$coord, ID:$dst_ptr), + "OpSubgroup2DBlockLoadINTEL $element_size $block_width $block_height $block_count $src_base_ptr $memory_width $memory_height $memory_pitch $coord $dst_ptr">; +def OpSubgroup2DBlockLoadTransposeINTEL: Op<6233, (outs), (ins ID:$element_size, ID:$block_width, ID:$block_height, + ID:$block_count, ID:$src_base_ptr, ID:$memory_width, ID:$memory_height, ID:$memory_pitch, ID:$coord, ID:$dst_ptr), + "OpSubgroup2DBlockLoadTransposeINTEL $element_size $block_width $block_height $block_count $src_base_ptr $memory_width $memory_height $memory_pitch $coord $dst_ptr">; +def OpSubgroup2DBlockLoadTransformINTEL: Op<6232, (outs), (ins ID:$element_size, ID:$block_width, ID:$block_height, + ID:$block_count, ID:$src_base_ptr, ID:$memory_width, ID:$memory_height, ID:$memory_pitch, ID:$coord, ID:$dst_ptr), + "OpSubgroup2DBlockLoadTransformINTEL $element_size $block_width $block_height $block_count $src_base_ptr $memory_width $memory_height $memory_pitch $coord $dst_ptr">; +def OpSubgroup2DBlockPrefetchINTEL: Op<6234, (outs), (ins ID:$element_size, ID:$block_width, ID:$block_height, + ID:$block_count, ID:$src_base_ptr, ID:$memory_width, ID:$memory_height, ID:$memory_pitch, ID:$coord), + "OpSubgroup2DBlockPrefetchINTEL $element_size $block_width $block_height $block_count $src_base_ptr $memory_width $memory_height $memory_pitch $coord">; +def OpSubgroup2DBlockStoreINTEL: Op<6235, (outs), (ins ID:$element_size, ID:$block_width, ID:$block_height, + ID:$block_count, ID:$src_ptr, ID:$dst_base_ptr, ID:$memory_width, ID:$memory_height, ID:$memory_pitch, ID:$coord), + "OpSubgroup2DBlockStoreINTEL $element_size $block_width $block_height $block_count $src_ptr $dst_base_ptr $memory_width $memory_height $memory_pitch $coord">; diff --git a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp index 6d2ecd563d200..a4f95ea11d073 100644 --- a/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp +++ b/llvm/lib/Target/SPIRV/SPIRVModuleAnalysis.cpp @@ -1739,6 +1739,30 @@ void addInstrRequirements(const MachineInstr &MI, Reqs.addExtension(SPIRV::Extension::SPV_INTEL_bindless_images); Reqs.addCapability(SPIRV::Capability::BindlessImagesINTEL); break; + case SPIRV::OpSubgroup2DBlockLoadINTEL: + case SPIRV::OpSubgroup2DBlockLoadTransposeINTEL: + case SPIRV::OpSubgroup2DBlockLoadTransformINTEL: + case SPIRV::OpSubgroup2DBlockPrefetchINTEL: + case SPIRV::OpSubgroup2DBlockStoreINTEL: { + if (!ST.canUseExtension(SPIRV::Extension::SPV_INTEL_2d_block_io)) + report_fatal_error("OpSubgroup2DBlock[Load/LoadTranspose/LoadTransform/" + "Prefetch/Store]INTEL instructions require the " + "following SPIR-V extension: SPV_INTEL_2d_block_io", + false); + Reqs.addExtension(SPIRV::Extension::SPV_INTEL_2d_block_io); + Reqs.addCapability(SPIRV::Capability::Subgroup2DBlockIOINTEL); + + const auto OpCode = MI.getOpcode(); + if (OpCode == SPIRV::OpSubgroup2DBlockLoadTransposeINTEL) { + Reqs.addCapability(SPIRV::Capability::Subgroup2DBlockTransposeINTEL); + break; + } + if (OpCode == SPIRV::OpSubgroup2DBlockLoadTransformINTEL) { + Reqs.addCapability(SPIRV::Capability::Subgroup2DBlockTransformINTEL); + break; + } + break; + } case SPIRV::OpKill: { Reqs.addCapability(SPIRV::Capability::Shader); } break; diff --git a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td index cc32200a0a261..851495bda4979 100644 --- a/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td +++ b/llvm/lib/Target/SPIRV/SPIRVSymbolicOperands.td @@ -315,6 +315,7 @@ defm SPV_INTEL_memory_access_aliasing : ExtensionOperand<118>; defm SPV_INTEL_fp_max_error : ExtensionOperand<119>; defm SPV_INTEL_ternary_bitwise_function : ExtensionOperand<120>; defm SPV_INTEL_subgroup_matrix_multiply_accumulate : ExtensionOperand<121>; +defm SPV_INTEL_2d_block_io : ExtensionOperand<122>; //===----------------------------------------------------------------------===// // Multiclass used to define Capabilities enum values and at the same time @@ -517,6 +518,9 @@ defm MemoryAccessAliasingINTEL : CapabilityOperand<5910, 0, 0, [SPV_INTEL_memory defm FPMaxErrorINTEL : CapabilityOperand<6169, 0, 0, [SPV_INTEL_fp_max_error], []>; defm TernaryBitwiseFunctionINTEL : CapabilityOperand<6241, 0, 0, [SPV_INTEL_ternary_bitwise_function], []>; defm SubgroupMatrixMultiplyAccumulateINTEL : CapabilityOperand<6236, 0, 0, [SPV_INTEL_subgroup_matrix_multiply_accumulate], []>; +defm Subgroup2DBlockIOINTEL : CapabilityOperand<6228, 0, 0, [SPV_INTEL_2d_block_io], []>; +defm Subgroup2DBlockTransformINTEL : CapabilityOperand<6229, 0, 0, [SPV_INTEL_2d_block_io], [Subgroup2DBlockIOINTEL]>; +defm Subgroup2DBlockTransposeINTEL : CapabilityOperand<6230, 0, 0, [SPV_INTEL_2d_block_io], [Subgroup2DBlockIOINTEL]>; //===----------------------------------------------------------------------===// // Multiclass used to define SourceLanguage enum values and at the same time diff --git a/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_2d_block_io/2d_block_io_generic.ll b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_2d_block_io/2d_block_io_generic.ll new file mode 100644 index 0000000000000..f04ec4d3e1648 --- /dev/null +++ b/llvm/test/CodeGen/SPIRV/extensions/SPV_INTEL_2d_block_io/2d_block_io_generic.ll @@ -0,0 +1,48 @@ +; RUN: not llc -O0 -mtriple=spirv64-unknown-unknown %s -o %t.spvt 2>&1 | FileCheck %s --check-prefix=CHECK-ERROR +; RUN: llc -O0 -verify-machineinstrs -mtriple=spirv64-unknown-unknown --spirv-ext=+SPV_INTEL_2d_block_io %s -o - | FileCheck %s +; RUN: %if spirv-tools %{ llc -O0 -mtriple=spirv64-unknown-unknown %s --spirv-ext=+SPV_INTEL_2d_block_io -o - -filetype=obj | spirv-val %} + +; CHECK-ERROR: LLVM ERROR: OpSubgroup2DBlock[Load/LoadTranspose/LoadTransform/Prefetch/Store]INTEL +; CHECK-ERROR-SAME: instructions require the following SPIR-V extension: SPV_INTEL_2d_block_io + +; CHECK: OpCapability Subgroup2DBlockIOINTEL +; CHECK: OpCapability Subgroup2DBlockTransformINTEL +; CHECK: OpCapability Subgroup2DBlockTransposeINTEL +; CHECK: OpExtension "SPV_INTEL_2d_block_io" + +; CHECK-DAG: %[[Int8Ty:[0-9]+]] = OpTypeInt 8 0 +; CHECK-DAG: %[[Int32Ty:[0-9]+]] = OpTypeInt 32 0 +; CHECK-DAG: %[[Const42:[0-9]+]] = OpConstant %[[Int32Ty]] 42 +; CHECK-DAG: %[[VoidTy:[0-9]+]] = OpTypeVoid +; CHECK-DAG: %[[GlbPtrTy:[0-9]+]] = OpTypePointer CrossWorkgroup %[[Int8Ty]] +; CHECK-DAG: %[[VectorTy:[0-9]+]] = OpTypeVector %[[Int32Ty]] 2 +; CHECK-DAG: %[[PrvPtrTy:[0-9]+]] = OpTypePointer Function %[[Int8Ty]] +; CHECK: %[[BaseSrc:[0-9]+]] = OpFunctionParameter %[[GlbPtrTy]] +; CHECK: %[[BaseDst:[0-9]+]] = OpFunctionParameter %[[GlbPtrTy]] +; CHECK: %[[Width:[0-9]+]] = OpFunctionParameter %[[Int32Ty]] +; CHECK: %[[Height:[0-9]+]] = OpFunctionParameter %[[Int32Ty]] +; CHECK: %[[Pitch:[0-9]+]] = OpFunctionParameter %[[Int32Ty]] +; CHECK: %[[Coord:[0-9]+]] = OpFunctionParameter %[[VectorTy]] +; CHECK: %[[Dst:[0-9]+]] = OpFunctionParameter %[[PrvPtrTy]] +; CHECK: %[[Src:[0-9]+]] = OpFunctionParameter %[[PrvPtrTy]] +; CHECK: OpSubgroup2DBlockLoadINTEL %[[Const42]] %[[Const42]] %[[Const42]] %[[Const42]] %[[BaseSrc]] %[[Width]] %[[Height]] %[[Pitch]] %[[Coord]] %[[Dst]] +; CHECK: OpSubgroup2DBlockLoadTransformINTEL %[[Const42]] %[[Const42]] %[[Const42]] %[[Const42]] %[[BaseSrc]] %[[Width]] %[[Height]] %[[Pitch]] %[[Coord]] %[[Dst]] +; CHECK: OpSubgroup2DBlockLoadTransposeINTEL %[[Const42]] %[[Const42]] %[[Const42]] %[[Const42]] %[[BaseSrc]] %[[Width]] %[[Height]] %[[Pitch]] %[[Coord]] %[[Dst]] +; CHECK: OpSubgroup2DBlockPrefetchINTEL %[[Const42]] %[[Const42]] %[[Const42]] %[[Const42]] %[[BaseSrc]] %[[Width]] %[[Height]] %[[Pitch]] %[[Coord]] +; CHECK: OpSubgroup2DBlockStoreINTEL %[[Const42]] %[[Const42]] %[[Const42]] %[[Const42]] %[[Src]] %[[BaseDst]] %[[Width]] %[[Height]] %[[Pitch]] %[[Coord]] + +define spir_func void @foo(ptr addrspace(1) %base_address, ptr addrspace(1) %dst_base_pointer, i32 %width, i32 %height, i32 %pitch, <2 x i32> %coord, ptr %dst_pointer, ptr %src_pointer) { +entry: + call spir_func void @_Z32__spirv_Subgroup2DBlockLoadINTELiiiiPU3AS1KviiiDv2_iPv(i32 42, i32 42, i32 42, i32 42, ptr addrspace(1) %base_address, i32 %width, i32 %height, i32 %pitch, <2 x i32> %coord, ptr %dst_pointer) + call spir_func void @_Z41__spirv_Subgroup2DBlockLoadTransformINTELiiiiPU3AS1KviiiDv2_iPv(i32 42, i32 42, i32 42, i32 42, ptr addrspace(1) %base_address, i32 %width, i32 %height, i32 %pitch, <2 x i32> %coord, ptr %dst_pointer) + call spir_func void @_Z41__spirv_Subgroup2DBlockLoadTransposeINTELiiiiPU3AS1KviiiDv2_iPv(i32 42, i32 42, i32 42, i32 42, ptr addrspace(1) %base_address, i32 %width, i32 %height, i32 %pitch, <2 x i32> %coord, ptr %dst_pointer) + call spir_func void @_Z36__spirv_Subgroup2DBlockPrefetchINTELiiiiPU3AS1KviiiDv2_i(i32 42, i32 42, i32 42, i32 42, ptr addrspace(1) %base_address, i32 %width, i32 %height, i32 %pitch, <2 x i32> %coord) + call spir_func void @_Z33__spirv_Subgroup2DBlockStoreINTELiiiiPKvPU3AS1viiiDv2_i(i32 42, i32 42, i32 42, i32 42, ptr %src_pointer, ptr addrspace(1) %dst_base_pointer, i32 %width, i32 %height, i32 %pitch, <2 x i32> %coord) + ret void +} + +declare spir_func void @_Z32__spirv_Subgroup2DBlockLoadINTELiiiiPU3AS1KviiiDv2_iPv(i32, i32, i32, i32, ptr addrspace(1), i32, i32, i32, <2 x i32>, ptr) +declare spir_func void @_Z41__spirv_Subgroup2DBlockLoadTransformINTELiiiiPU3AS1KviiiDv2_iPv(i32, i32, i32, i32, ptr addrspace(1), i32, i32, i32, <2 x i32>, ptr) +declare spir_func void @_Z41__spirv_Subgroup2DBlockLoadTransposeINTELiiiiPU3AS1KviiiDv2_iPv(i32, i32, i32, i32, ptr addrspace(1), i32, i32, i32, <2 x i32>, ptr) +declare spir_func void @_Z36__spirv_Subgroup2DBlockPrefetchINTELiiiiPU3AS1KviiiDv2_i(i32, i32, i32, i32, ptr addrspace(1), i32, i32, i32, <2 x i32>) +declare spir_func void @_Z33__spirv_Subgroup2DBlockStoreINTELiiiiPKvPU3AS1viiiDv2_i(i32, i32, i32, i32, ptr, ptr addrspace(1), i32, i32, i32, <2 x i32>)