diff --git a/.github/workflows/clang-format-checker.yml b/.github/workflows/clang-format-checker.yml index d1887e4519..74b734a7c0 100644 --- a/.github/workflows/clang-format-checker.yml +++ b/.github/workflows/clang-format-checker.yml @@ -12,7 +12,7 @@ jobs: permissions: pull-requests: write steps: - - name: Fetch LLVM sources + - name: Fetch DirectXShaderCompiler sources uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: ref: ${{ github.event.pull_request.head.sha }} @@ -31,6 +31,20 @@ jobs: separator: "," skip_initial_fetch: true + # We need to pull the script from the main branch, so that we ensure + # we get the latest version of this script. + - name: Fetch code formatting utils + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + repository: microsoft/DirectXShaderCompiler + ref: ${{ github.base_ref }} + sparse-checkout: | + utils/git/requirements_formatting.txt + utils/git/code-format-helper.py + utils/git/code-format-save-diff.py + sparse-checkout-cone-mode: false + path: code-format-tools + - name: "Listed files" env: LISTED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} @@ -48,10 +62,10 @@ jobs: with: python-version: '3.11' cache: 'pip' - cache-dependency-path: 'utils/git/requirements_formatting.txt' + cache-dependency-path: 'code-format-tools/utils/git/requirements_formatting.txt' - name: Install python dependencies - run: pip install -r utils/git/requirements_formatting.txt + run: pip install -r code-format-tools/utils/git/requirements_formatting.txt - name: Run code formatter id: formatter @@ -61,7 +75,7 @@ jobs: END_REV: ${{ github.event.pull_request.head.sha }} CHANGED_FILES: ${{ steps.changed-files.outputs.all_changed_files }} run: | - python utils/git/code-format-helper.py \ + python code-format-tools/utils/git/code-format-helper.py \ --token ${{ secrets.GITHUB_TOKEN }} \ --issue-number $GITHUB_PR_NUMBER \ --start-rev $START_REV \ @@ -92,28 +106,37 @@ jobs: } catch (err) { core.setFailed(`Request failed with error ${err}`) } - - name: Fetch LLVM sources - uses: actions/checkout@v4 + + # We need to pull the script from the main branch, so that we ensure + # we get the latest version of this script. + - name: Fetch code formatting utils + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 with: - fetch-depth: 2 - path: build/main_src + repository: microsoft/DirectXShaderCompiler + ref: ${{ github.base_ref }} + sparse-checkout: | + utils/git/requirements_formatting.txt + utils/git/code-format-helper.py + utils/git/code-format-save-diff.py + sparse-checkout-cone-mode: false + path: code-format-tools - name: Setup Python env uses: actions/setup-python@v4 with: python-version: '3.11' cache: 'pip' - cache-dependency-path: 'build/main_src/utils/git/requirements_formatting.txt' + cache-dependency-path: 'code-format-tools/utils/git/requirements_formatting.txt' - name: Install python dependencies - run: pip install -r build/main_src/utils/git/requirements_formatting.txt + run: pip install -r code-format-tools/utils/git/requirements_formatting.txt - name: Apply code diff env: GITHUB_PR_NUMBER: ${{ github.event.issue.number }} COMMENT_ID: ${{ github.event.comment.id }} run: | - python build/main_src/utils/git/code-format-save-diff.py \ + python code-format-tools/utils/git/code-format-save-diff.py \ --token ${{ secrets.GITHUB_TOKEN }} \ --issue-number $GITHUB_PR_NUMBER \ --tmp-diff-file $TMP_DIFF_FILE \ diff --git a/autoconf/config.guess b/autoconf/config.guess index cf0541d1f1..62df94c187 100755 --- a/autoconf/config.guess +++ b/autoconf/config.guess @@ -929,6 +929,9 @@ EOF ia64:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; + loongarch64:Linux:*:*) + echo ${UNAME_MACHINE}-unknown-linux-gnu + exit ;; m32r*:Linux:*:*) echo ${UNAME_MACHINE}-unknown-linux-gnu exit ;; diff --git a/azure-pipelines.yml b/azure-pipelines.yml index 33c5349f9e..dee579287c 100644 --- a/azure-pipelines.yml +++ b/azure-pipelines.yml @@ -52,17 +52,17 @@ stages: variables: macOS: macOS-latest - linux: Ubuntu-latest + linux: Ubuntu-22.04 # FIXME: #7364, DXC does not build correctly with GCC 13+ strategy: matrix: - Linux_Clang_Release: + Linux_Clang_RelWithDebInfo: image: ${{ variables.linux }} - configuration: Release + configuration: RelWithDebInfo CC: clang-18 CXX: clang++-18 - CMAKE_OPTS: -DLLVM_ENABLE_WERROR=On -DLLVM_USE_SANITIZER='Address;Undefined' -DLLVM_ENABLE_LIBCXX=On -DLLVM_USE_LINKER=lld - CHECK_ALL_ENV: ASAN_OPTIONS=alloc_dealloc_mismatch=0 + CMAKE_OPTS: -DLLVM_ENABLE_WERROR=On -DLLVM_USE_SANITIZER='Address;Undefined' -DLLVM_ENABLE_LIBCXX=On -DLLVM_USE_LINKER=lld-18 + CHECK_ALL_ENV: ASAN_OPTIONS=alloc_dealloc_mismatch=0 LSAN_OPTIONS=suppressions=$BUILD_SOURCESDIRECTORY/utils/asan/x86_64-pc-linux-gnu.lsan.supp:print_suppressions=0 ASAN_SYMBOLIZER_PATH=/usr/bin/llvm-symbolizer-18 LSAN_SYMBOLIZER_PATH=/usr/bin/llvm-symbolizer-18 OS: Linux Linux_Clang_Debug: image: ${{ variables.linux }} @@ -107,6 +107,8 @@ stages: versionSpec: '3.x' - bash: | + sudo apt-get update + sudo apt-get upgrade libc6 libc6-dbg sudo apt-get install ninja-build wget https://apt.llvm.org/llvm.sh chmod u+x llvm.sh diff --git a/cmake/config-ix.cmake b/cmake/config-ix.cmake index 4541d08162..226881ad30 100644 --- a/cmake/config-ix.cmake +++ b/cmake/config-ix.cmake @@ -367,6 +367,8 @@ elseif (LLVM_NATIVE_ARCH MATCHES "wasm64") set(LLVM_NATIVE_ARCH WebAssembly) elseif (LLVM_NATIVE_ARCH MATCHES "riscv64") set(LLVM_NATIVE_ARCH RISCV) +elseif (LLVM_NATIVE_ARCH MATCHES "loongarch64") + set(LLVM_NATIVE_ARCH LoongArch) elseif (LLVM_NATIVE_ARCH MATCHES "e2k") set(LLVM_NATIVE_ARCH E2K) else () diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake index acf76c2907..00bdaed363 100644 --- a/cmake/modules/HandleLLVMOptions.cmake +++ b/cmake/modules/HandleLLVMOptions.cmake @@ -301,7 +301,6 @@ if( MSVC ) set(msvc_warning_flags # Disabled warnings. - -wd4146 # Suppress 'unary minus operator applied to unsigned type, result still unsigned' -wd4180 # Suppress 'qualifier applied to function type has no meaning; ignored' -wd4244 # Suppress ''argument' : conversion from 'type1' to 'type2', possible loss of data' -wd4258 # Suppress ''var' : definition from the for loop is ignored; the definition from the enclosing scope is used' diff --git a/docs/DXIL.rst b/docs/DXIL.rst index a1c5055085..1a2a691d27 100644 --- a/docs/DXIL.rst +++ b/docs/DXIL.rst @@ -2419,6 +2419,10 @@ ID Name Description 302 ReservedC9 reserved 303 RawBufferVectorLoad reads from a raw buffer and structured buffer 304 RawBufferVectorStore writes to a RWByteAddressBuffer or RWStructuredBuffer +305 MatVecMul Multiplies a MxK dimension matrix and a K sized input vector +306 MatVecMulAdd multiplies a MxK dimension matrix and a K sized input vector and adds an M-sized bias vector +307 OuterProductAccumulate Computes the outer product between column vectors and an MxN matrix is accumulated component-wise atomically (with device scope) in memory +308 VectorAccumulate Accumulates the components of a vector component-wise atomically (with device scope) to the corresponding elements of an array in memory === ===================================================== ======================================================================================================================================================================================================================= @@ -3065,287 +3069,299 @@ The set of validation rules that are known to hold for a DXIL program is identif .. hctdb_instrhelp.get_valrules_rst() .. VALRULES-RST:BEGIN -===================================================== ======================================================================================================================================================================================================================================================================================================== -Rule Code Description -===================================================== ======================================================================================================================================================================================================================================================================================================== -BITCODE.VALID Module must be bitcode-valid -CONTAINER.CONTENTINVALID DXIL Container Content is well-formed -CONTAINER.CONTENTMATCHES DXIL Container Content must match Module -CONTAINER.PARTINVALID DXIL Container must not contain unknown parts -CONTAINER.PARTMATCHES DXIL Container Parts must match Module -CONTAINER.PARTMISSING DXIL Container requires certain parts, corresponding to module -CONTAINER.PARTREPEATED DXIL Container must have only one of each part type -CONTAINER.ROOTSIGNATUREINCOMPATIBLE Root Signature in DXIL Container must be compatible with shader -CONTAINER.UNUSEDITEMINTABLE Items in Table must be used -DECL.ALLOCATERAYQUERY2FLAGSARECONST constRayFlags and RayQueryFlags for AllocateRayQuery2 must be constant -DECL.ALLOCATERAYQUERYFLAGSARECONST RayFlags for AllocateRayQuery must be constant -DECL.ALLOWOPACITYMICROMAPSEXPECTEDGIVENFORCEOMM2STATE When the ForceOMM2State ConstRayFlag is given as an argument to a RayQuery object, AllowOpacityMicromaps is expected as a RayQueryFlag argument -DECL.ATTRSTRUCT Attributes parameter must be struct type -DECL.DXILFNEXTERN External function must be a DXIL function -DECL.DXILNSRESERVED The DXIL reserved prefixes must only be used by built-in functions and types -DECL.EXTRAARGS Extra arguments not allowed for shader functions -DECL.FNATTRIBUTE Functions should only contain known function attributes -DECL.FNFLATTENPARAM Function parameters must not use struct types -DECL.FNISCALLED Functions can only be used by call instructions -DECL.MULTIPLENODEINPUTS A node shader may not have more than one input record -DECL.NODELAUNCHINPUTTYPE Invalid input record type for node launch type -DECL.NOTUSEDEXTERNAL External declaration should not be used -DECL.PARAMSTRUCT Callable function parameter must be struct type -DECL.PAYLOADSTRUCT Payload parameter must be struct type -DECL.RAYQUERYINFNSIG Rayquery objects not allowed in function signatures -DECL.RESOURCEINFNSIG Resources not allowed in function signatures -DECL.SHADERMISSINGARG payload/params/attributes parameter is required for certain shader types -DECL.SHADERRETURNVOID Shader functions must return void -DECL.USEDEXTERNALFUNCTION External function must be used -DECL.USEDINTERNAL Internal declaration must be used -FLOW.DEADLOOP Loop must have break. -FLOW.FUNCTIONCALL Function with parameter is not permitted -FLOW.NORECURSION Recursion is not permitted. -FLOW.REDUCIBLE Execution flow must be reducible. -INSTR.ALLOWED Instructions must be of an allowed type. -INSTR.ATOMICCONST Constant destination to atomic. -INSTR.ATOMICINTRINNONUAV Non-UAV destination to atomic intrinsic. -INSTR.ATOMICOPNONGROUPSHAREDORRECORD Non-groupshared or node record destination to atomic operation. -INSTR.ATTRIBUTEATVERTEXNOINTERPOLATION Attribute %0 must have nointerpolation mode in order to use GetAttributeAtVertex function. -INSTR.BARRIERFLAGINVALID Invalid %0 flags on DXIL operation '%1' -INSTR.BARRIERMODEFORNONCS sync in a non-Compute/Amplification/Mesh/Node Shader must only sync UAV (sync_uglobal). -INSTR.BARRIERMODENOMEMORY sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory). Only _t (thread group sync) is optional. -INSTR.BARRIERMODEUSELESSUGROUP sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal. -INSTR.BARRIERNONCONSTANTFLAGARGUMENT Memory type, access, or sync flag is not constant -INSTR.BARRIERREQUIRESNODE sync in a non-Node Shader must not sync node record memory. -INSTR.BUFFERUPDATECOUNTERONRESHASCOUNTER BufferUpdateCounter valid only when HasCounter is true. -INSTR.BUFFERUPDATECOUNTERONUAV BufferUpdateCounter valid only on UAV. -INSTR.CALLOLOAD Call to DXIL intrinsic must match overload signature -INSTR.CANNOTPULLPOSITION pull-model evaluation of position disallowed -INSTR.CBUFFERCLASSFORCBUFFERHANDLE Expect Cbuffer for CBufferLoad handle. -INSTR.CBUFFEROUTOFBOUND Cbuffer access out of bound. -INSTR.CHECKACCESSFULLYMAPPED CheckAccessFullyMapped should only be used on resource status. -INSTR.CONSTALIGNFORRAWBUF Raw Buffer alignment value must be a constant. -INSTR.COORDINATECOUNTFORRAWTYPEDBUF raw/typed buffer offset must be undef. -INSTR.COORDINATECOUNTFORSTRUCTBUF structured buffer requires defined index and offset coordinates. -INSTR.CREATEHANDLEIMMRANGEID Local resource must map to global resource. -INSTR.DXILSTRUCTUSER Dxil struct types should only be used by ExtractValue. -INSTR.DXILSTRUCTUSEROUTOFBOUND Index out of bound when extract value from dxil struct types. -INSTR.EVALINTERPOLATIONMODE Interpolation mode on %0 used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample. -INSTR.EXTRACTVALUE ExtractValue should only be used on dxil struct types and cmpxchg. -INSTR.FAILTORESLOVETGSMPOINTER TGSM pointers must originate from an unambiguous TGSM global variable. -INSTR.HANDLENOTFROMCREATEHANDLE Resource handle should returned by createHandle. -INSTR.ILLEGALDXILOPCODE DXILOpCode must be [0..%0]. %1 specified. -INSTR.ILLEGALDXILOPFUNCTION '%0' is not a DXILOpFuncition for DXILOpcode '%1'. -INSTR.IMMBIASFORSAMPLEB bias amount for sample_b must be in the range [%0,%1], but %2 was specified as an immediate. -INSTR.INBOUNDSACCESS Access to out-of-bounds memory is disallowed. -INSTR.MAYREORDERTHREADUNDEFCOHERENCEHINTPARAM Use of undef coherence hint or num coherence hint bits in MaybeReorderThread. -INSTR.MINPRECISIONNOTPRECISE Instructions marked precise may not refer to minprecision values. -INSTR.MINPRECISONBITCAST Bitcast on minprecison types is not allowed. -INSTR.MIPLEVELFORGETDIMENSION Use mip level on buffer when GetDimensions. -INSTR.MIPONUAVLOAD uav load don't support mipLevel/sampleIndex. -INSTR.MISSINGSETMESHOUTPUTCOUNTS Missing SetMeshOutputCounts call. -INSTR.MULTIPLEGETMESHPAYLOAD GetMeshPayload cannot be called multiple times. -INSTR.MULTIPLESETMESHOUTPUTCOUNTS SetMeshOUtputCounts cannot be called multiple times. -INSTR.NODERECORDHANDLEUSEAFTERCOMPLETE Invalid use of completed record handle. -INSTR.NOGENERICPTRADDRSPACECAST Address space cast between pointer types must have one part to be generic address space. -INSTR.NOIDIVBYZERO No signed integer division by zero. -INSTR.NOINDEFINITEACOS No indefinite arccosine. -INSTR.NOINDEFINITEASIN No indefinite arcsine. -INSTR.NOINDEFINITEDSXY No indefinite derivative calculation. -INSTR.NOINDEFINITELOG No indefinite logarithm. -INSTR.NONDOMINATINGDISPATCHMESH Non-Dominating DispatchMesh call. -INSTR.NONDOMINATINGSETMESHOUTPUTCOUNTS Non-Dominating SetMeshOutputCounts call. -INSTR.NOREADINGUNINITIALIZED Instructions should not read uninitialized value. -INSTR.NOTONCEDISPATCHMESH DispatchMesh must be called exactly once in an Amplification shader. -INSTR.NOUDIVBYZERO No unsigned integer division by zero. -INSTR.OFFSETONUAVLOAD uav load don't support offset. -INSTR.OLOAD DXIL intrinsic overload must be valid. -INSTR.ONLYONEALLOCCONSUME RWStructuredBuffers may increment or decrement their counters, but not both. -INSTR.OPCODERESERVED Instructions must not reference reserved opcodes. -INSTR.OPCONST DXIL intrinsic requires an immediate constant operand -INSTR.OPCONSTRANGE Constant values must be in-range for operation. -INSTR.OPERANDRANGE DXIL intrinsic operand must be within defined range -INSTR.PTRBITCAST Pointer type bitcast must be have same size. -INSTR.RESOURCECLASSFORLOAD load can only run on UAV/SRV resource. -INSTR.RESOURCECLASSFORSAMPLERGATHER sample, lod and gather should be on srv resource. -INSTR.RESOURCECLASSFORUAVSTORE store should be on uav resource. -INSTR.RESOURCECOORDINATEMISS coord uninitialized. -INSTR.RESOURCECOORDINATETOOMANY out of bound coord must be undef. -INSTR.RESOURCEKINDFORBUFFERLOADSTORE buffer load/store only works on Raw/Typed/StructuredBuffer. -INSTR.RESOURCEKINDFORCALCLOD lod requires resource declared as texture1D/2D/3D/Cube/CubeArray/1DArray/2DArray. -INSTR.RESOURCEKINDFORGATHER gather requires resource declared as texture/2D/Cube/2DArray/CubeArray. -INSTR.RESOURCEKINDFORGETDIM Invalid resource kind on GetDimensions. -INSTR.RESOURCEKINDFORSAMPLE sample/_l/_d requires resource declared as texture1D/2D/3D/Cube/1DArray/2DArray/CubeArray. -INSTR.RESOURCEKINDFORSAMPLEC samplec requires resource declared as texture1D/2D/Cube/1DArray/2DArray/CubeArray. -INSTR.RESOURCEKINDFORTEXTURELOAD texture load only works on Texture1D/1DArray/2D/2DArray/3D/MS2D/MS2DArray. -INSTR.RESOURCEKINDFORTEXTURESTORE texture store only works on Texture1D/1DArray/2D/2DArray/3D. -INSTR.RESOURCEKINDFORTRACERAY TraceRay should only use RTAccelerationStructure. -INSTR.RESOURCEMAPTOSINGLEENTRY Fail to map resource to resource table. -INSTR.RESOURCEOFFSETMISS offset uninitialized. -INSTR.RESOURCEOFFSETTOOMANY out of bound offset must be undef. -INSTR.RESOURCEUSER Resource should only be used by Load/GEP/Call. -INSTR.SAMPLECOMPTYPE sample_* instructions require resource to be declared to return UNORM, SNORM or FLOAT. -INSTR.SAMPLEINDEXFORLOAD2DMS load on Texture2DMS/2DMSArray require sampleIndex. -INSTR.SAMPLERMODEFORLOD lod instruction requires sampler declared in default mode. -INSTR.SAMPLERMODEFORSAMPLE sample/_l/_d/_cl_s/gather instruction requires sampler declared in default mode. -INSTR.SAMPLERMODEFORSAMPLEC sample_c_*/gather_c instructions require sampler declared in comparison mode. -INSTR.SIGNATUREOPERATIONNOTINENTRY Dxil operation for input output signature must be in entryPoints. -INSTR.STATUS Resource status should only be used by CheckAccessFullyMapped. -INSTR.STRUCTBITCAST Bitcast on struct types is not allowed. -INSTR.SVCONFLICTINGLAUNCHMODE Input system values are compatible with node shader launch mode. -INSTR.TEXTUREOFFSET offset texture instructions must take offset which can resolve to integer literal in the range -8 to 7. -INSTR.TGSMRACECOND Race condition writing to shared memory detected, consider making this write conditional. -INSTR.UNDEFHITOBJECT HitObject is undef. -INSTR.UNDEFINEDVALUEFORUAVSTORE Assignment of undefined values to UAV. -INSTR.UNDEFRESULTFORGETDIMENSION GetDimensions used undef dimension %0 on %1. -INSTR.WRITEMASKFORTYPEDUAVSTORE store on typed uav must write to all four components of the UAV. -INSTR.WRITEMASKGAPFORUAV UAV write mask must be contiguous, starting at x: .x, .xy, .xyz, or .xyzw. -INSTR.WRITEMASKMATCHVALUEFORUAVSTORE uav store write mask must match store value mask, write mask is %0 and store value mask is %1. -META.BARYCENTRICSFLOAT3 only 'float3' type is allowed for SV_Barycentrics. -META.BARYCENTRICSINTERPOLATION SV_Barycentrics cannot be used with 'nointerpolation' type. -META.BARYCENTRICSTWOPERSPECTIVES There can only be up to two input attributes of SV_Barycentrics with different perspective interpolation mode. -META.BRANCHFLATTEN Can't use branch and flatten attributes together. -META.CLIPCULLMAXCOMPONENTS Combined elements of SV_ClipDistance and SV_CullDistance must fit in 8 components -META.CLIPCULLMAXROWS Combined elements of SV_ClipDistance and SV_CullDistance must fit in two rows. -META.COMPUTEWITHNODE Compute entry must not have node metadata -META.CONTROLFLOWHINTNOTONCONTROLFLOW Control flow hint only works on control flow inst. -META.DENSERESIDS Resource identifiers must be zero-based and dense. -META.DUPLICATESYSVALUE System value may only appear once in signature -META.ENTRYFUNCTION entrypoint not found. -META.FLAGSUSAGE Flags must match usage. -META.FORCECASEONSWITCH Attribute forcecase only works for switch. -META.GLCNOTONAPPENDCONSUME globallycoherent cannot be used with append/consume buffers: '%0'. -META.INTEGERINTERPMODE Interpolation mode on integer must be Constant -META.INTERPMODEINONEROW Interpolation mode must be identical for all elements packed into the same row. -META.INTERPMODEVALID Interpolation mode must be valid -META.INVALIDCONTROLFLOWHINT Invalid control flow hint. -META.KNOWN Named metadata should be known -META.MAXTESSFACTOR Hull Shader MaxTessFactor must be [%0..%1]. %2 specified. -META.NOENTRYPROPSFORENTRY Entry point %0 must have entry properties. -META.NOSEMANTICOVERLAP Semantics must not overlap -META.REQUIRED Required metadata missing. -META.SEMAKINDMATCHESNAME Semantic name must match system value, when defined. -META.SEMAKINDVALID Semantic kind must be valid -META.SEMANTICCOMPTYPE %0 must be %1. -META.SEMANTICINDEXMAX System value semantics have a maximum valid semantic index -META.SEMANTICLEN Semantic length must be at least 1 and at most 64. -META.SEMANTICSHOULDBEALLOCATED Semantic should have a valid packing location -META.SEMANTICSHOULDNOTBEALLOCATED Semantic should have a packing location of -1 -META.SIGNATURECOMPTYPE signature %0 specifies unrecognized or invalid component type. -META.SIGNATUREDATAWIDTH Data width must be identical for all elements packed into the same row. -META.SIGNATUREILLEGALCOMPONENTORDER Component ordering for packed elements must be: arbitrary < system value < system generated value -META.SIGNATUREINDEXCONFLICT Only elements with compatible indexing rules may be packed together -META.SIGNATUREOUTOFRANGE Signature elements must fit within maximum signature size -META.SIGNATUREOVERLAP Signature elements may not overlap in packing location. -META.STRUCTBUFALIGNMENT StructuredBuffer stride not aligned -META.STRUCTBUFALIGNMENTOUTOFBOUND StructuredBuffer stride out of bounds -META.SYSTEMVALUEROWS System value may only have 1 row -META.TARGET Target triple must be 'dxil-ms-dx' -META.TESSELLATOROUTPUTPRIMITIVE Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW. -META.TESSELLATORPARTITION Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even. -META.TEXTURETYPE elements of typed buffers and textures must fit in four 32-bit quantities. -META.USED All metadata must be used by dxil. -META.VALIDSAMPLERMODE Invalid sampler mode on sampler . -META.VALUERANGE Metadata value must be within range. -META.VERSIONSUPPORTED Version in metadata must be supported. -META.WELLFORMED Metadata must be well-formed in operand count and types. -SM.64BITRAWBUFFERLOADSTORE i64/f64 rawBufferLoad/Store overloads are allowed after SM 6.3. -SM.AMPLIFICATIONSHADERPAYLOADSIZE For amplification shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes. -SM.AMPLIFICATIONSHADERPAYLOADSIZEDECLARED For amplification shader with entry '%0', payload size %1 is greater than declared size of %2 bytes. -SM.APPENDANDCONSUMEONSAMEUAV BufferUpdateCounter inc and dec on a given UAV (%d) cannot both be in the same shader for shader model less than 5.1. -SM.CBUFFERARRAYOFFSETALIGNMENT CBuffer array offset must be aligned to 16-bytes -SM.CBUFFERELEMENTOVERFLOW CBuffer elements must not overflow -SM.CBUFFEROFFSETOVERLAP CBuffer offsets must not overlap -SM.CBUFFERSIZE CBuffer size must not exceed 65536 bytes -SM.CBUFFERTEMPLATETYPEMUSTBESTRUCT D3D12 constant/texture buffer template element can only be a struct. -SM.COMPLETEPOSITION Not all elements of SV_Position were written. -SM.CONSTANTINTERPMODE Interpolation mode must be constant for MS primitive output. -SM.COUNTERONLYONSTRUCTBUF BufferUpdateCounter valid only on structured buffers. -SM.CSNOSIGNATURES Compute shaders must not have shader signatures. -SM.DOMAINLOCATIONIDXOOB DomainLocation component index out of bounds for the domain. -SM.DSINPUTCONTROLPOINTCOUNTRANGE DS input control point count must be [0..%0]. %1 specified. -SM.DXILVERSION Target shader model requires specific Dxil Version -SM.GSINSTANCECOUNTRANGE GS instance count must be [1..%0]. %1 specified. -SM.GSOUTPUTVERTEXCOUNTRANGE GS output vertex count must be [0..%0]. %1 specified. -SM.GSTOTALOUTPUTVERTEXDATARANGE Declared output vertex count (%0) multiplied by the total number of declared scalar components of output data (%1) equals %2. This value cannot be greater than %3. -SM.GSVALIDINPUTPRIMITIVE GS input primitive unrecognized. -SM.GSVALIDOUTPUTPRIMITIVETOPOLOGY GS output primitive topology unrecognized. -SM.HSINPUTCONTROLPOINTCOUNTRANGE HS input control point count must be [0..%0]. %1 specified. -SM.HULLPASSTHRUCONTROLPOINTCOUNTMATCH For pass thru hull shader, input control point count must match output control point count -SM.INCOMPATIBLECALLINENTRY Features used in internal function calls must be compatible with entry -SM.INCOMPATIBLEDERIVINCOMPUTESHADERMODEL Derivatives in compute-model shaders require shader model 6.6 and above -SM.INCOMPATIBLEDERIVLAUNCH Node shaders only support derivatives in broadcasting launch mode -SM.INCOMPATIBLEOPERATION Operations used in entry function must be compatible with shader stage and other properties -SM.INCOMPATIBLEREQUIRESGROUP Functions requiring groupshared memory must be called from shaders with a visible group -SM.INCOMPATIBLESHADERMODEL Functions may only use features available in the current shader model -SM.INCOMPATIBLESTAGE Functions may only use features available in the entry function's stage -SM.INCOMPATIBLETHREADGROUPDIM When derivatives are used in compute-model shaders, the thread group dimensions must be compatible -SM.INSIDETESSFACTORSIZEMATCHDOMAIN InsideTessFactor rows, columns (%0, %1) invalid for domain %2. Expected %3 rows and 1 column. -SM.INVALIDRESOURCECOMPTYPE Invalid resource return type. -SM.INVALIDRESOURCEKIND Invalid resources kind. -SM.INVALIDSAMPLERFEEDBACKTYPE Invalid sampler feedback type. -SM.INVALIDTEXTUREKINDONUAV TextureCube[Array] resources are not supported with UAVs. -SM.ISOLINEOUTPUTPRIMITIVEMISMATCH Hull Shader declared with IsoLine Domain must specify output primitive point or line. Triangle_cw or triangle_ccw output are not compatible with the IsoLine Domain. -SM.MAXMSSMSIZE Total Thread Group Shared Memory storage is %0, exceeded %1. -SM.MAXTGSMSIZE Total Thread Group Shared Memory storage is %0, exceeded %1. -SM.MAXTHEADGROUP Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1. -SM.MESHPSIGROWCOUNT For shader '%0', primitive output signatures are taking up more than %1 rows. -SM.MESHSHADERINOUTSIZE For shader '%0', payload plus output size is greater than %1. -SM.MESHSHADERMAXPRIMITIVECOUNT MS max primitive output count must be [0..%0]. %1 specified. -SM.MESHSHADERMAXVERTEXCOUNT MS max vertex output count must be [0..%0]. %1 specified. -SM.MESHSHADEROUTPUTSIZE For shader '%0', vertex plus primitive output size is greater than %1. -SM.MESHSHADERPAYLOADSIZE For mesh shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes. -SM.MESHSHADERPAYLOADSIZEDECLARED For mesh shader with entry '%0', payload size %1 is greater than declared size of %2 bytes. -SM.MESHTOTALSIGROWCOUNT For shader '%0', vertex and primitive output signatures are taking up more than %1 rows. -SM.MESHVSIGROWCOUNT For shader '%0', vertex output signatures are taking up more than %1 rows. -SM.MULTISTREAMMUSTBEPOINT When multiple GS output streams are used they must be pointlists -SM.NAME Target shader model name must be known -SM.NOINTERPMODE Interpolation mode must be undefined for VS input/PS output/patch constant. -SM.NOPSOUTPUTIDX Pixel shader output registers are not indexable. -SM.OPCODE Opcode must be defined in target shader model -SM.OPCODEININVALIDFUNCTION Invalid DXIL opcode usage like StorePatchConstant in patch constant function -SM.OPERAND Operand must be defined in target shader model. -SM.OUTPUTCONTROLPOINTCOUNTRANGE output control point count must be [%0..%1]. %2 specified. -SM.OUTPUTCONTROLPOINTSTOTALSCALARS Total number of scalars across all HS output control points must not exceed . -SM.PATCHCONSTANTONLYFORHSDS patch constant signature only valid in HS and DS. -SM.PROGRAMVERSION Program Version in Dxil Container does not match Dxil Module shader model version -SM.PSCONSISTENTINTERP Interpolation mode for PS input position must be linear_noperspective_centroid or linear_noperspective_sample when outputting oDepthGE or oDepthLE and not running at sample frequency (which is forced by inputting SV_SampleIndex or declaring an input linear_sample or linear_noperspective_sample). -SM.PSCOVERAGEANDINNERCOVERAGE InnerCoverage and Coverage are mutually exclusive. -SM.PSMULTIPLEDEPTHSEMANTIC Pixel Shader only allows one type of depth semantic to be declared. -SM.PSOUTPUTSEMANTIC Pixel Shader allows output semantics to be SV_Target, SV_Depth, SV_DepthGreaterEqual, SV_DepthLessEqual, SV_Coverage or SV_StencilRef, %0 found. -SM.PSTARGETCOL0 SV_Target packed location must start at column 0. -SM.PSTARGETINDEXMATCHESROW SV_Target semantic index must match packed row location. -SM.RAYSHADERPAYLOADSIZE For shader '%0', %1 size is smaller than argument's allocation size. -SM.RAYSHADERSIGNATURES Ray tracing shader '%0' should not have any shader signatures. -SM.RESOURCERANGEOVERLAP Resource ranges must not overlap -SM.ROVONLYINPS RasterizerOrdered objects are only allowed in 5.0+ pixel shaders. -SM.SAMPLECOUNTONLYON2DMS Only Texture2DMS/2DMSArray could has sample count. -SM.SEMANTIC Semantic must be defined in target shader model -SM.STREAMINDEXRANGE Stream index (%0) must between 0 and %1. -SM.TESSFACTORFORDOMAIN Required TessFactor for domain not found declared anywhere in Patch Constant data. -SM.TESSFACTORSIZEMATCHDOMAIN TessFactor rows, columns (%0, %1) invalid for domain %2. Expected %3 rows and 1 column. -SM.TGSMUNSUPPORTED Thread Group Shared Memory not supported %0. -SM.THREADGROUPCHANNELRANGE Declared Thread Group %0 size %1 outside valid range [%2..%3]. -SM.TRIOUTPUTPRIMITIVEMISMATCH Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain. -SM.UNDEFINEDOUTPUT Not all elements of output %0 were written. -SM.VALIDDOMAIN Invalid Tessellator Domain specified. Must be isoline, tri or quad. -SM.VIEWIDNEEDSSLOT ViewID requires compatible space in pixel shader input signature -SM.WAVESIZEALLZEROWHENUNDEFINED WaveSize Max and Preferred must be 0 when Min is 0 -SM.WAVESIZEEXPECTSONEPARAM WaveSize tag expects exactly 1 parameter. -SM.WAVESIZEMAXANDPREFERREDZEROWHENNORANGE WaveSize Max and Preferred must be 0 to encode min==max -SM.WAVESIZEMAXGREATERTHANMIN WaveSize Max must greater than Min -SM.WAVESIZENEEDSCONSTANTOPERANDS WaveSize metadata operands must be constant values. -SM.WAVESIZENEEDSSM66OR67 WaveSize is valid only for Shader Model 6.6 and 6.7. -SM.WAVESIZEONCOMPUTEORNODE WaveSize only allowed on compute or node shaders -SM.WAVESIZEPREFERREDINRANGE WaveSize Preferred must be within Min..Max range -SM.WAVESIZERANGEEXPECTSTHREEPARAMS WaveSize Range tag expects exactly 3 parameters. -SM.WAVESIZERANGENEEDSSM68PLUS WaveSize Range is valid only for Shader Model 6.8 and higher. -SM.WAVESIZETAGDUPLICATE WaveSize or WaveSizeRange tag may only appear once per entry point. -SM.WAVESIZEVALUE WaveSize value must be a power of 2 in range [4..128] -SM.ZEROHSINPUTCONTROLPOINTWITHINPUT When HS input control point count is 0, no input signature should exist. -TYPES.DEFINED Type must be defined based on DXIL primitives -TYPES.I8 I8 can only be used as immediate value for intrinsic or as i8* via bitcast by lifetime intrinsics. -TYPES.INTWIDTH Int type must be of valid width -TYPES.NOMULTIDIM Only one dimension allowed for array type. -TYPES.NOPTRTOPTR Pointers to pointers, or pointers in structures are not allowed. -TYPES.NOVECTOR Vector types must not be present -===================================================== ======================================================================================================================================================================================================================================================================================================== +============================================================= ======================================================================================================================================================================================================================================================================================================== +Rule Code Description +============================================================= ======================================================================================================================================================================================================================================================================================================== +BITCODE.VALID Module must be bitcode-valid +CONTAINER.CONTENTINVALID DXIL Container Content is well-formed +CONTAINER.CONTENTMATCHES DXIL Container Content must match Module +CONTAINER.PARTINVALID DXIL Container must not contain unknown parts +CONTAINER.PARTMATCHES DXIL Container Parts must match Module +CONTAINER.PARTMISSING DXIL Container requires certain parts, corresponding to module +CONTAINER.PARTREPEATED DXIL Container must have only one of each part type +CONTAINER.ROOTSIGNATUREINCOMPATIBLE Root Signature in DXIL Container must be compatible with shader +CONTAINER.UNUSEDITEMINTABLE Items in Table must be used +DECL.ALLOCATERAYQUERY2FLAGSARECONST constRayFlags and RayQueryFlags for AllocateRayQuery2 must be constant +DECL.ALLOCATERAYQUERYFLAGSARECONST RayFlags for AllocateRayQuery must be constant +DECL.ALLOWOPACITYMICROMAPSEXPECTEDGIVENFORCEOMM2STATE When the ForceOMM2State ConstRayFlag is given as an argument to a RayQuery object, AllowOpacityMicromaps is expected as a RayQueryFlag argument +DECL.ATTRSTRUCT Attributes parameter must be struct type +DECL.DXILFNEXTERN External function must be a DXIL function +DECL.DXILNSRESERVED The DXIL reserved prefixes must only be used by built-in functions and types +DECL.EXTRAARGS Extra arguments not allowed for shader functions +DECL.FNATTRIBUTE Functions should only contain known function attributes +DECL.FNFLATTENPARAM Function parameters must not use struct types +DECL.FNISCALLED Functions can only be used by call instructions +DECL.MULTIPLENODEINPUTS A node shader may not have more than one input record +DECL.NODELAUNCHINPUTTYPE Invalid input record type for node launch type +DECL.NOTUSEDEXTERNAL External declaration should not be used +DECL.PARAMSTRUCT Callable function parameter must be struct type +DECL.PAYLOADSTRUCT Payload parameter must be struct type +DECL.RAYQUERYINFNSIG Rayquery objects not allowed in function signatures +DECL.RESOURCEINFNSIG Resources not allowed in function signatures +DECL.SHADERMISSINGARG payload/params/attributes parameter is required for certain shader types +DECL.SHADERRETURNVOID Shader functions must return void +DECL.USEDEXTERNALFUNCTION External function must be used +DECL.USEDINTERNAL Internal declaration must be used +FLOW.DEADLOOP Loop must have break. +FLOW.FUNCTIONCALL Function with parameter is not permitted +FLOW.NORECURSION Recursion is not permitted. +FLOW.REDUCIBLE Execution flow must be reducible. +INSTR.ALLOWED Instructions must be of an allowed type. +INSTR.ATOMICCONST Constant destination to atomic. +INSTR.ATOMICINTRINNONUAV Non-UAV destination to atomic intrinsic. +INSTR.ATOMICOPNONGROUPSHAREDORRECORD Non-groupshared or node record destination to atomic operation. +INSTR.ATTRIBUTEATVERTEXNOINTERPOLATION Attribute %0 must have nointerpolation mode in order to use GetAttributeAtVertex function. +INSTR.BARRIERFLAGINVALID Invalid %0 flags on DXIL operation '%1' +INSTR.BARRIERMODEFORNONCS sync in a non-Compute/Amplification/Mesh/Node Shader must only sync UAV (sync_uglobal). +INSTR.BARRIERMODENOMEMORY sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory). Only _t (thread group sync) is optional. +INSTR.BARRIERMODEUSELESSUGROUP sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal. +INSTR.BARRIERNONCONSTANTFLAGARGUMENT Memory type, access, or sync flag is not constant +INSTR.BARRIERREQUIRESNODE sync in a non-Node Shader must not sync node record memory. +INSTR.BUFFERUPDATECOUNTERONRESHASCOUNTER BufferUpdateCounter valid only when HasCounter is true. +INSTR.BUFFERUPDATECOUNTERONUAV BufferUpdateCounter valid only on UAV. +INSTR.CALLOLOAD Call to DXIL intrinsic must match overload signature +INSTR.CANNOTPULLPOSITION pull-model evaluation of position disallowed +INSTR.CBUFFERCLASSFORCBUFFERHANDLE Expect Cbuffer for CBufferLoad handle. +INSTR.CBUFFEROUTOFBOUND Cbuffer access out of bound. +INSTR.CHECKACCESSFULLYMAPPED CheckAccessFullyMapped should only be used on resource status. +INSTR.CONSTALIGNFORRAWBUF Raw Buffer alignment value must be a constant. +INSTR.COORDINATECOUNTFORRAWTYPEDBUF raw/typed buffer offset must be undef. +INSTR.COORDINATECOUNTFORSTRUCTBUF structured buffer requires defined index and offset coordinates. +INSTR.CREATEHANDLEIMMRANGEID Local resource must map to global resource. +INSTR.DXILSTRUCTUSER Dxil struct types should only be used by ExtractValue. +INSTR.DXILSTRUCTUSEROUTOFBOUND Index out of bound when extract value from dxil struct types. +INSTR.EVALINTERPOLATIONMODE Interpolation mode on %0 used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample. +INSTR.EXTRACTVALUE ExtractValue should only be used on dxil struct types and cmpxchg. +INSTR.FAILTORESLOVETGSMPOINTER TGSM pointers must originate from an unambiguous TGSM global variable. +INSTR.HANDLENOTFROMCREATEHANDLE Resource handle should returned by createHandle. +INSTR.ILLEGALDXILOPCODE DXILOpCode must be [0..%0]. %1 specified. +INSTR.ILLEGALDXILOPFUNCTION '%0' is not a DXILOpFuncition for DXILOpcode '%1'. +INSTR.IMMBIASFORSAMPLEB bias amount for sample_b must be in the range [%0,%1], but %2 was specified as an immediate. +INSTR.INBOUNDSACCESS Access to out-of-bounds memory is disallowed. +INSTR.LINALGINTERPRETATIONPARAMARECONST In Linalg operations, Interpretation value is a constant. +INSTR.LINALGINVALIDMATRIXLAYOUTVALUEFORMATVECOPS Matrix Layout for Linalg Mul/MulAdd operation must be valid. +INSTR.LINALGINVALIDMATRIXLAYOUTVALUEFOROUTERPRODUCTACCUMULATE Matrix Layout for Linalg Mul/MulAdd operation must be valid. +INSTR.LINALGINVALIDMEMORYINTERPVALUE In Memory Interpolation value must be valid. +INSTR.LINALGINVALIDREGISTERINTERPVALUE From Register Interpretation value must be valid. +INSTR.LINALGMATRIXLAYOUTNOTTRANSPOSABLE Row Major and Column Major matrix layouts are not transposable. +INSTR.LINALGMATRIXSHAPEPARAMSARECONST Matrix Layout, Dimensions and isTranspose are constants +INSTR.LINALGMATRIXSTRIDEZEROFOROPTIMALLAYOUTS For optimal layouts, matrix stride must be zero. +INSTR.LINALGNOTANUNSIGNEDTYPE Unsigned flag set for a float signed type +INSTR.MATVECOPISUNSIGNEDFLAGSARECONST In Linalg Mul/MulAdd functions, IsUnsigned flag is a constant. +INSTR.MAYREORDERTHREADUNDEFCOHERENCEHINTPARAM Use of undef coherence hint or num coherence hint bits in MaybeReorderThread. +INSTR.MINPRECISIONNOTPRECISE Instructions marked precise may not refer to minprecision values. +INSTR.MINPRECISONBITCAST Bitcast on minprecison types is not allowed. +INSTR.MIPLEVELFORGETDIMENSION Use mip level on buffer when GetDimensions. +INSTR.MIPONUAVLOAD uav load don't support mipLevel/sampleIndex. +INSTR.MISSINGSETMESHOUTPUTCOUNTS Missing SetMeshOutputCounts call. +INSTR.MULTIPLEGETMESHPAYLOAD GetMeshPayload cannot be called multiple times. +INSTR.MULTIPLESETMESHOUTPUTCOUNTS SetMeshOUtputCounts cannot be called multiple times. +INSTR.NODERECORDHANDLEUSEAFTERCOMPLETE Invalid use of completed record handle. +INSTR.NOGENERICPTRADDRSPACECAST Address space cast between pointer types must have one part to be generic address space. +INSTR.NOIDIVBYZERO No signed integer division by zero. +INSTR.NOINDEFINITEACOS No indefinite arccosine. +INSTR.NOINDEFINITEASIN No indefinite arcsine. +INSTR.NOINDEFINITEDSXY No indefinite derivative calculation. +INSTR.NOINDEFINITELOG No indefinite logarithm. +INSTR.NONDOMINATINGDISPATCHMESH Non-Dominating DispatchMesh call. +INSTR.NONDOMINATINGSETMESHOUTPUTCOUNTS Non-Dominating SetMeshOutputCounts call. +INSTR.NOREADINGUNINITIALIZED Instructions should not read uninitialized value. +INSTR.NOTONCEDISPATCHMESH DispatchMesh must be called exactly once in an Amplification shader. +INSTR.NOUDIVBYZERO No unsigned integer division by zero. +INSTR.OFFSETONUAVLOAD uav load don't support offset. +INSTR.OLOAD DXIL intrinsic overload must be valid. +INSTR.ONLYONEALLOCCONSUME RWStructuredBuffers may increment or decrement their counters, but not both. +INSTR.OPCODERESERVED Instructions must not reference reserved opcodes. +INSTR.OPCONST DXIL intrinsic requires an immediate constant operand +INSTR.OPCONSTRANGE Constant values must be in-range for operation. +INSTR.OPERANDRANGE DXIL intrinsic operand must be within defined range +INSTR.PARAMMULTIPLE Parameter must be a valid multiple +INSTR.PTRBITCAST Pointer type bitcast must be have same size. +INSTR.REORDERCOHERENTREQUIRESSM69 reordercoherent requires SM 6.9 or later. +INSTR.RESOURCECLASSFORLOAD load can only run on UAV/SRV resource. +INSTR.RESOURCECLASSFORSAMPLERGATHER sample, lod and gather should be on srv resource. +INSTR.RESOURCECLASSFORUAVSTORE store should be on uav resource. +INSTR.RESOURCECOORDINATEMISS coord uninitialized. +INSTR.RESOURCECOORDINATETOOMANY out of bound coord must be undef. +INSTR.RESOURCEKINDFORBUFFERLOADSTORE buffer load/store only works on Raw/Typed/StructuredBuffer. +INSTR.RESOURCEKINDFORCALCLOD lod requires resource declared as texture1D/2D/3D/Cube/CubeArray/1DArray/2DArray. +INSTR.RESOURCEKINDFORGATHER gather requires resource declared as texture/2D/Cube/2DArray/CubeArray. +INSTR.RESOURCEKINDFORGETDIM Invalid resource kind on GetDimensions. +INSTR.RESOURCEKINDFORSAMPLE sample/_l/_d requires resource declared as texture1D/2D/3D/Cube/1DArray/2DArray/CubeArray. +INSTR.RESOURCEKINDFORSAMPLEC samplec requires resource declared as texture1D/2D/Cube/1DArray/2DArray/CubeArray. +INSTR.RESOURCEKINDFORTEXTURELOAD texture load only works on Texture1D/1DArray/2D/2DArray/3D/MS2D/MS2DArray. +INSTR.RESOURCEKINDFORTEXTURESTORE texture store only works on Texture1D/1DArray/2D/2DArray/3D. +INSTR.RESOURCEKINDFORTRACERAY TraceRay should only use RTAccelerationStructure. +INSTR.RESOURCEMAPTOSINGLEENTRY Fail to map resource to resource table. +INSTR.RESOURCEOFFSETMISS offset uninitialized. +INSTR.RESOURCEOFFSETTOOMANY out of bound offset must be undef. +INSTR.RESOURCEUSER Resource should only be used by Load/GEP/Call. +INSTR.SAMPLECOMPTYPE sample_* instructions require resource to be declared to return UNORM, SNORM or FLOAT. +INSTR.SAMPLEINDEXFORLOAD2DMS load on Texture2DMS/2DMSArray require sampleIndex. +INSTR.SAMPLERMODEFORLOD lod instruction requires sampler declared in default mode. +INSTR.SAMPLERMODEFORSAMPLE sample/_l/_d/_cl_s/gather instruction requires sampler declared in default mode. +INSTR.SAMPLERMODEFORSAMPLEC sample_c_*/gather_c instructions require sampler declared in comparison mode. +INSTR.SIGNATUREOPERATIONNOTINENTRY Dxil operation for input output signature must be in entryPoints. +INSTR.STATUS Resource status should only be used by CheckAccessFullyMapped. +INSTR.STRUCTBITCAST Bitcast on struct types is not allowed. +INSTR.SVCONFLICTINGLAUNCHMODE Input system values are compatible with node shader launch mode. +INSTR.TEXTUREOFFSET offset texture instructions must take offset which can resolve to integer literal in the range -8 to 7. +INSTR.TGSMRACECOND Race condition writing to shared memory detected, consider making this write conditional. +INSTR.UNDEFHITOBJECT HitObject is undef. +INSTR.UNDEFINEDVALUEFORUAVSTORE Assignment of undefined values to UAV. +INSTR.UNDEFRESULTFORGETDIMENSION GetDimensions used undef dimension %0 on %1. +INSTR.WRITEMASKFORTYPEDUAVSTORE store on typed uav must write to all four components of the UAV. +INSTR.WRITEMASKGAPFORUAV UAV write mask must be contiguous, starting at x: .x, .xy, .xyz, or .xyzw. +INSTR.WRITEMASKMATCHVALUEFORUAVSTORE uav store write mask must match store value mask, write mask is %0 and store value mask is %1. +META.BARYCENTRICSFLOAT3 only 'float3' type is allowed for SV_Barycentrics. +META.BARYCENTRICSINTERPOLATION SV_Barycentrics cannot be used with 'nointerpolation' type. +META.BARYCENTRICSTWOPERSPECTIVES There can only be up to two input attributes of SV_Barycentrics with different perspective interpolation mode. +META.BRANCHFLATTEN Can't use branch and flatten attributes together. +META.CLIPCULLMAXCOMPONENTS Combined elements of SV_ClipDistance and SV_CullDistance must fit in 8 components +META.CLIPCULLMAXROWS Combined elements of SV_ClipDistance and SV_CullDistance must fit in two rows. +META.COHERENCENOTONAPPENDCONSUME globally/reorder coherent incompatible with append/consume/counter buffers +META.COMPUTEWITHNODE Compute entry must not have node metadata +META.CONTROLFLOWHINTNOTONCONTROLFLOW Control flow hint only works on control flow inst. +META.DENSERESIDS Resource identifiers must be zero-based and dense. +META.DUPLICATESYSVALUE System value may only appear once in signature +META.ENTRYFUNCTION entrypoint not found. +META.FLAGSUSAGE Flags must match usage. +META.FORCECASEONSWITCH Attribute forcecase only works for switch. +META.INTEGERINTERPMODE Interpolation mode on integer must be Constant +META.INTERPMODEINONEROW Interpolation mode must be identical for all elements packed into the same row. +META.INTERPMODEVALID Interpolation mode must be valid +META.INVALIDCONTROLFLOWHINT Invalid control flow hint. +META.KNOWN Named metadata should be known +META.MAXTESSFACTOR Hull Shader MaxTessFactor must be [%0..%1]. %2 specified. +META.NOENTRYPROPSFORENTRY Entry point %0 must have entry properties. +META.NOSEMANTICOVERLAP Semantics must not overlap +META.REQUIRED Required metadata missing. +META.SEMAKINDMATCHESNAME Semantic name must match system value, when defined. +META.SEMAKINDVALID Semantic kind must be valid +META.SEMANTICCOMPTYPE %0 must be %1. +META.SEMANTICINDEXMAX System value semantics have a maximum valid semantic index +META.SEMANTICLEN Semantic length must be at least 1 and at most 64. +META.SEMANTICSHOULDBEALLOCATED Semantic should have a valid packing location +META.SEMANTICSHOULDNOTBEALLOCATED Semantic should have a packing location of -1 +META.SIGNATURECOMPTYPE signature %0 specifies unrecognized or invalid component type. +META.SIGNATUREDATAWIDTH Data width must be identical for all elements packed into the same row. +META.SIGNATUREILLEGALCOMPONENTORDER Component ordering for packed elements must be: arbitrary < system value < system generated value +META.SIGNATUREINDEXCONFLICT Only elements with compatible indexing rules may be packed together +META.SIGNATUREOUTOFRANGE Signature elements must fit within maximum signature size +META.SIGNATUREOVERLAP Signature elements may not overlap in packing location. +META.STRUCTBUFALIGNMENT StructuredBuffer stride not aligned +META.STRUCTBUFALIGNMENTOUTOFBOUND StructuredBuffer stride out of bounds +META.SYSTEMVALUEROWS System value may only have 1 row +META.TARGET Target triple must be 'dxil-ms-dx' +META.TESSELLATOROUTPUTPRIMITIVE Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW. +META.TESSELLATORPARTITION Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even. +META.TEXTURETYPE elements of typed buffers and textures must fit in four 32-bit quantities. +META.USED All metadata must be used by dxil. +META.VALIDSAMPLERMODE Invalid sampler mode on sampler . +META.VALUERANGE Metadata value must be within range. +META.VERSIONSUPPORTED Version in metadata must be supported. +META.WELLFORMED Metadata must be well-formed in operand count and types. +SM.64BITRAWBUFFERLOADSTORE i64/f64 rawBufferLoad/Store overloads are allowed after SM 6.3. +SM.AMPLIFICATIONSHADERPAYLOADSIZE For amplification shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes. +SM.AMPLIFICATIONSHADERPAYLOADSIZEDECLARED For amplification shader with entry '%0', payload size %1 is greater than declared size of %2 bytes. +SM.APPENDANDCONSUMEONSAMEUAV BufferUpdateCounter inc and dec on a given UAV (%d) cannot both be in the same shader for shader model less than 5.1. +SM.CBUFFERARRAYOFFSETALIGNMENT CBuffer array offset must be aligned to 16-bytes +SM.CBUFFERELEMENTOVERFLOW CBuffer elements must not overflow +SM.CBUFFEROFFSETOVERLAP CBuffer offsets must not overlap +SM.CBUFFERSIZE CBuffer size must not exceed 65536 bytes +SM.CBUFFERTEMPLATETYPEMUSTBESTRUCT D3D12 constant/texture buffer template element can only be a struct. +SM.COMPLETEPOSITION Not all elements of SV_Position were written. +SM.CONSTANTINTERPMODE Interpolation mode must be constant for MS primitive output. +SM.COUNTERONLYONSTRUCTBUF BufferUpdateCounter valid only on structured buffers. +SM.CSNOSIGNATURES Compute shaders must not have shader signatures. +SM.DOMAINLOCATIONIDXOOB DomainLocation component index out of bounds for the domain. +SM.DSINPUTCONTROLPOINTCOUNTRANGE DS input control point count must be [0..%0]. %1 specified. +SM.DXILVERSION Target shader model requires specific Dxil Version +SM.GSINSTANCECOUNTRANGE GS instance count must be [1..%0]. %1 specified. +SM.GSOUTPUTVERTEXCOUNTRANGE GS output vertex count must be [0..%0]. %1 specified. +SM.GSTOTALOUTPUTVERTEXDATARANGE Declared output vertex count (%0) multiplied by the total number of declared scalar components of output data (%1) equals %2. This value cannot be greater than %3. +SM.GSVALIDINPUTPRIMITIVE GS input primitive unrecognized. +SM.GSVALIDOUTPUTPRIMITIVETOPOLOGY GS output primitive topology unrecognized. +SM.HSINPUTCONTROLPOINTCOUNTRANGE HS input control point count must be [0..%0]. %1 specified. +SM.HULLPASSTHRUCONTROLPOINTCOUNTMATCH For pass thru hull shader, input control point count must match output control point count +SM.INCOMPATIBLECALLINENTRY Features used in internal function calls must be compatible with entry +SM.INCOMPATIBLEDERIVINCOMPUTESHADERMODEL Derivatives in compute-model shaders require shader model 6.6 and above +SM.INCOMPATIBLEDERIVLAUNCH Node shaders only support derivatives in broadcasting launch mode +SM.INCOMPATIBLEOPERATION Operations used in entry function must be compatible with shader stage and other properties +SM.INCOMPATIBLEREQUIRESGROUP Functions requiring groupshared memory must be called from shaders with a visible group +SM.INCOMPATIBLESHADERMODEL Functions may only use features available in the current shader model +SM.INCOMPATIBLESTAGE Functions may only use features available in the entry function's stage +SM.INCOMPATIBLETHREADGROUPDIM When derivatives are used in compute-model shaders, the thread group dimensions must be compatible +SM.INSIDETESSFACTORSIZEMATCHDOMAIN InsideTessFactor rows, columns (%0, %1) invalid for domain %2. Expected %3 rows and 1 column. +SM.INVALIDRESOURCECOMPTYPE Invalid resource return type. +SM.INVALIDRESOURCEKIND Invalid resources kind. +SM.INVALIDSAMPLERFEEDBACKTYPE Invalid sampler feedback type. +SM.INVALIDTEXTUREKINDONUAV TextureCube[Array] resources are not supported with UAVs. +SM.ISOLINEOUTPUTPRIMITIVEMISMATCH Hull Shader declared with IsoLine Domain must specify output primitive point or line. Triangle_cw or triangle_ccw output are not compatible with the IsoLine Domain. +SM.MAXMSSMSIZE Total Thread Group Shared Memory storage is %0, exceeded %1. +SM.MAXTGSMSIZE Total Thread Group Shared Memory storage is %0, exceeded %1. +SM.MAXTHEADGROUP Declared Thread Group Count %0 (X*Y*Z) is beyond the valid maximum of %1. +SM.MESHPSIGROWCOUNT For shader '%0', primitive output signatures are taking up more than %1 rows. +SM.MESHSHADERINOUTSIZE For shader '%0', payload plus output size is greater than %1. +SM.MESHSHADERMAXPRIMITIVECOUNT MS max primitive output count must be [0..%0]. %1 specified. +SM.MESHSHADERMAXVERTEXCOUNT MS max vertex output count must be [0..%0]. %1 specified. +SM.MESHSHADEROUTPUTSIZE For shader '%0', vertex plus primitive output size is greater than %1. +SM.MESHSHADERPAYLOADSIZE For mesh shader with entry '%0', payload size %1 is greater than maximum size of %2 bytes. +SM.MESHSHADERPAYLOADSIZEDECLARED For mesh shader with entry '%0', payload size %1 is greater than declared size of %2 bytes. +SM.MESHTOTALSIGROWCOUNT For shader '%0', vertex and primitive output signatures are taking up more than %1 rows. +SM.MESHVSIGROWCOUNT For shader '%0', vertex output signatures are taking up more than %1 rows. +SM.MULTISTREAMMUSTBEPOINT When multiple GS output streams are used they must be pointlists +SM.NAME Target shader model name must be known +SM.NOINTERPMODE Interpolation mode must be undefined for VS input/PS output/patch constant. +SM.NOPSOUTPUTIDX Pixel shader output registers are not indexable. +SM.OPCODE Opcode must be defined in target shader model +SM.OPCODEININVALIDFUNCTION Invalid DXIL opcode usage like StorePatchConstant in patch constant function +SM.OPERAND Operand must be defined in target shader model. +SM.OUTPUTCONTROLPOINTCOUNTRANGE output control point count must be [%0..%1]. %2 specified. +SM.OUTPUTCONTROLPOINTSTOTALSCALARS Total number of scalars across all HS output control points must not exceed . +SM.PATCHCONSTANTONLYFORHSDS patch constant signature only valid in HS and DS. +SM.PROGRAMVERSION Program Version in Dxil Container does not match Dxil Module shader model version +SM.PSCONSISTENTINTERP Interpolation mode for PS input position must be linear_noperspective_centroid or linear_noperspective_sample when outputting oDepthGE or oDepthLE and not running at sample frequency (which is forced by inputting SV_SampleIndex or declaring an input linear_sample or linear_noperspective_sample). +SM.PSCOVERAGEANDINNERCOVERAGE InnerCoverage and Coverage are mutually exclusive. +SM.PSMULTIPLEDEPTHSEMANTIC Pixel Shader only allows one type of depth semantic to be declared. +SM.PSOUTPUTSEMANTIC Pixel Shader allows output semantics to be SV_Target, SV_Depth, SV_DepthGreaterEqual, SV_DepthLessEqual, SV_Coverage or SV_StencilRef, %0 found. +SM.PSTARGETCOL0 SV_Target packed location must start at column 0. +SM.PSTARGETINDEXMATCHESROW SV_Target semantic index must match packed row location. +SM.RAYSHADERPAYLOADSIZE For shader '%0', %1 size is smaller than argument's allocation size. +SM.RAYSHADERSIGNATURES Ray tracing shader '%0' should not have any shader signatures. +SM.RESOURCERANGEOVERLAP Resource ranges must not overlap +SM.ROVONLYINPS RasterizerOrdered objects are only allowed in 5.0+ pixel shaders. +SM.SAMPLECOUNTONLYON2DMS Only Texture2DMS/2DMSArray could has sample count. +SM.SEMANTIC Semantic must be defined in target shader model +SM.STREAMINDEXRANGE Stream index (%0) must between 0 and %1. +SM.TESSFACTORFORDOMAIN Required TessFactor for domain not found declared anywhere in Patch Constant data. +SM.TESSFACTORSIZEMATCHDOMAIN TessFactor rows, columns (%0, %1) invalid for domain %2. Expected %3 rows and 1 column. +SM.TGSMUNSUPPORTED Thread Group Shared Memory not supported %0. +SM.THREADGROUPCHANNELRANGE Declared Thread Group %0 size %1 outside valid range [%2..%3]. +SM.TRIOUTPUTPRIMITIVEMISMATCH Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain. +SM.UNDEFINEDOUTPUT Not all elements of output %0 were written. +SM.VALIDDOMAIN Invalid Tessellator Domain specified. Must be isoline, tri or quad. +SM.VIEWIDNEEDSSLOT ViewID requires compatible space in pixel shader input signature +SM.WAVESIZEALLZEROWHENUNDEFINED WaveSize Max and Preferred must be 0 when Min is 0 +SM.WAVESIZEEXPECTSONEPARAM WaveSize tag expects exactly 1 parameter. +SM.WAVESIZEMAXANDPREFERREDZEROWHENNORANGE WaveSize Max and Preferred must be 0 to encode min==max +SM.WAVESIZEMAXGREATERTHANMIN WaveSize Max must greater than Min +SM.WAVESIZENEEDSCONSTANTOPERANDS WaveSize metadata operands must be constant values. +SM.WAVESIZENEEDSSM66OR67 WaveSize is valid only for Shader Model 6.6 and 6.7. +SM.WAVESIZEONCOMPUTEORNODE WaveSize only allowed on compute or node shaders +SM.WAVESIZEPREFERREDINRANGE WaveSize Preferred must be within Min..Max range +SM.WAVESIZERANGEEXPECTSTHREEPARAMS WaveSize Range tag expects exactly 3 parameters. +SM.WAVESIZERANGENEEDSSM68PLUS WaveSize Range is valid only for Shader Model 6.8 and higher. +SM.WAVESIZETAGDUPLICATE WaveSize or WaveSizeRange tag may only appear once per entry point. +SM.WAVESIZEVALUE WaveSize value must be a power of 2 in range [4..128] +SM.ZEROHSINPUTCONTROLPOINTWITHINPUT When HS input control point count is 0, no input signature should exist. +TYPES.DEFINED Type must be defined based on DXIL primitives +TYPES.I8 I8 can only be used as immediate value for intrinsic or as i8* via bitcast by lifetime intrinsics. +TYPES.INTWIDTH Int type must be of valid width +TYPES.NOMULTIDIM Only one dimension allowed for array type. +TYPES.NOPTRTOPTR Pointers to pointers, or pointers in structures are not allowed. +TYPES.NOVECTOR Vector types must not be present +============================================================= ======================================================================================================================================================================================================================================================================================================== .. VALRULES-RST:END diff --git a/docs/ReleaseNotes.md b/docs/ReleaseNotes.md index 637bd8dae8..6850902a81 100644 --- a/docs/ReleaseNotes.md +++ b/docs/ReleaseNotes.md @@ -19,9 +19,48 @@ The included licenses apply to the following files: ### Upcoming Release -Place release notes for the upcoming release below this line and remove this line upon naming this release. +- Fix regression: [#7510](https://github.com/microsoft/DirectXShaderCompiler/issues/7510) crash when calling `sizeof` on templated type. +- Fix regression: [#7508](https://github.com/microsoft/DirectXShaderCompiler/issues/7508) crash when calling `Load` with `status`. +- Header file `dxcpix.h` was added to the release package. + +### Version 1.8.2505 + +#### Potentially breaking changes - Typed buffers (including ROV buffers) no longer accept types other than vectors and scalars. Any other types will produce descriptive errors. This removes support for appropriately sized matrices and structs. Though it worked in some contexts, code generated from such types was unreliable. + - Load and Store operations have been refactored as a consequence. Behavior should be identical, please file issues if discrepancies are observed. +- The compiler will now always use the internal validator instead of searching for an external DXIL.dll. The (hidden) `-select-validator` option has been removed. + +#### Notable SPIR-V updates + +- Fix unnecessary Int64 requirement when loading Float64 +- Added vk::BufferPointer, see [proposal](https://github.com/microsoft/hlsl-specs/blob/main/proposals/0010-vk-buffer-ref.md) for more details. +- Implement QuadAny and QuadAll (#7266) +- Fix -fvk-invert-y (#7447) + +#### Shader Model 6.9 Preview + +You can now compile shaders to SM 6.9, but this is a preview, so shader hashes will be set to the PREVIEW_BYPASS pattern. +SM 6.9 shaders will only work with AgilitySDK 1.717.0-preview, a supported preview driver, and use of experimental shader models in developer mode. +Preview shaders will not be compatible with the SM 6.9 release, or likely even later versions of the SM 6.9 preview. + +SM 6.9 Preview Additions: + +- Long vectors are allowed in HLSL when targeting shader model 6.9. Vectors up to 1024 elements in length can be loaded from/stored to raw buffers and used in elementwise operations. See the [long vector proposal](https://github.com/microsoft/hlsl-specs/blob/main/proposals/0026-hlsl-long-vector-type.md) for more details. +- HLSL Vectors are still limited to a maximum of 4 elements when used in certain contexts: + - entry function inputs/outputs + - parameter, payload, attribute, and node record types for mesh, raytracing, and node shaders + - constant buffers (cbuffer), texture buffers (tbuffer), textures and typed buffers + - Note: some HLSL elementwise intrinsics do not yet support long vectors in this preview +- Native vectors of up to 1024 elements are now present in DXIL. This includes vector llvm instructions, load/store, and various elementwise DXIL operations. This may result in smaller DXIL and potentially other performance improvements. See the [dxil vectors proposal](https://github.com/microsoft/hlsl-specs/blob/main/proposals/0030-dxil-vectors.md) for more details. +- Cooperative Vector operations, a subset of Linear Algebra (LinAlg). See the [cooperative vectors proposal](https://github.com/microsoft/hlsl-specs/blob/main/proposals/0029-cooperative-vector.md) and the [HLSL header based API proposal](https://github.com/microsoft/hlsl-specs/blob/main/proposals/0031-hlsl-vector-matrix-operations.md) for more details. + - New built-in operations are added for multiplying long vectors with a matrix in a ByteAddressBuffer, optionally with accumulation and bias data, as well as outer product and vector accumulate operations. + - An HLSL header shipped with this release provides a more convenient API for using these built-in operations. +- Support for [Opacity Micromaps](https://github.com/microsoft/hlsl-specs/blob/main/proposals/0024-opacity-micromaps.md) in DXR shaders as well as for RayQuery. + - Unlocks DXR performance improvements using triangle sub-divisions for fast hit/miss detection to reduce the need for anyhit invocations. +- Support for [Shader Execution Reordering](https://github.com/microsoft/hlsl-specs/blob/main/proposals/0027-shader-execution-reordering.md) in DXR. + - Introduces `MaybeReorderThread()` to explicitly specify where and how shader execution coherence can be improved. `MaybeReorderThread()` can be used in raygeneration shaders. + - `HitObject` decouples traversal, intersection testing and anyhit shading from closesthit and miss shading for more control and better reordering opportunities. `HitObject` can be used in raygeneration, closesthit and miss shaders. ### Version 1.8.2502 diff --git a/docs/SPIR-V.rst b/docs/SPIR-V.rst index b5e9c05079..a695e5854d 100644 --- a/docs/SPIR-V.rst +++ b/docs/SPIR-V.rst @@ -896,6 +896,13 @@ are translated into SPIR-V ``OpTypeImage``, with parameters: The meanings of the headers in the above table is explained in ``OpTypeImage`` of the SPIR-V spec. +For storage images (e.g. ``RWTexture2D``) and texel buffers (e.g. ``RWBuffer``), +the image format is typically inferred from the data type ``T``. However, the +``-fspv-use-unknown-image-format`` command-line option can be used to change +this behavior. When this option is active, the default format for these +resources becomes ``Unknown`` if not otherwise specified by the +``[[vk::image_format]]`` attribute. + Vulkan specific Image Formats ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ @@ -1012,17 +1019,18 @@ right now: 2. DirectX memory layout rules for uniform buffers and storage buffers: they allow packing data on the application side that can be shared with DirectX. They can be enabled by ``-fvk-use-dx-layout``. + + NOTE: This requires ``VK_EXT_scalar_block_layout`` to be enabled on the + application side. 3. Strict OpenGL ``std140`` for uniform buffers and strict OpenGL ``std430`` for storage buffers: they allow packing data on the application side that can be shared with OpenGL. They can be enabled by ``-fvk-use-gl-layout``. 4. Scalar layout rules introduced via `VK_EXT_scalar_block_layout`, which basically aligns all aggregrate types according to their elements' natural alignment. They can be enabled by ``-fvk-use-scalar-layout``. - -To use scalar layout, the application side need to request -``VK_EXT_scalar_block_layout``. This is also true for using DirectX memory -layout since there is no dedicated DirectX layout extension for Vulkan -(at least for now). So we must request something more permissive. + + NOTE: This requires ``VK_EXT_scalar_block_layout`` to be enabled on the + application side. In the above, "vector-relaxed OpenGL ``std140``/``std430``" rules mean OpenGL ``std140``/``std430`` rules with the following modification for vector type @@ -1032,7 +1040,7 @@ alignment: 2. If the above causes an `improper straddle `_, the alignment will be set to 16 bytes. -As an exmaple, for the following HLSL definition: +As an example, for the following HLSL definition: .. code:: hlsl @@ -3967,7 +3975,7 @@ RayQuery Mapping to SPIR-V +---------------------------------------------------+-------------------------------------------------------------------------+ |``.WorldRayDirection`` | ``OpRayQueryGetWorldRayDirectionKHR`` | +---------------------------------------------------+-------------------------------------------------------------------------+ -|``.WorldRayOrigin` | ``OpRayQueryGetWorldRayOriginKHR`` | +|``.WorldRayOrigin`` | ``OpRayQueryGetWorldRayOriginKHR`` | +---------------------------------------------------+-------------------------------------------------------------------------+ Shader Model 6.0+ Wave Intrinsics @@ -4227,7 +4235,7 @@ codegen for Vulkan: - ``-fvk-use-dx-layout``: Uses DirectX layout rules for resources. - ``-fvk-invert-y``: Negates (additively inverts) SV_Position.y before writing to stage output. Used to accommodate the difference between Vulkan's - coordinate system and DirectX's. Only allowed in VS/DS/GS. + coordinate system and DirectX's. Only allowed in VS/DS/GS/MS/Lib. - ``-fvk-use-dx-position-w``: Reciprocates (multiplicatively inverts) SV_Position.w after reading from stage input. Used to accommodate the difference between Vulkan DirectX: the w component of SV_Position in PS is diff --git a/external/SPIRV-Headers b/external/SPIRV-Headers index 0e71067798..2a611a970f 160000 --- a/external/SPIRV-Headers +++ b/external/SPIRV-Headers @@ -1 +1 @@ -Subproject commit 0e710677989b4326ac974fd80c5308191ed80965 +Subproject commit 2a611a970fdbc41ac2e3e328802aed9985352dca diff --git a/external/SPIRV-Tools b/external/SPIRV-Tools index 4bd1536ed7..33e0256818 160000 --- a/external/SPIRV-Tools +++ b/external/SPIRV-Tools @@ -1 +1 @@ -Subproject commit 4bd1536ed79003a5194a4bd8c9aa2fa17a84c15b +Subproject commit 33e02568181e3312f49a3cf33df470bf96ef293a diff --git a/include/dxc/DXIL/DxilConstants.h b/include/dxc/DXIL/DxilConstants.h index 8c73328fbd..84588a2ff7 100644 --- a/include/dxc/DXIL/DxilConstants.h +++ b/include/dxc/DXIL/DxilConstants.h @@ -154,6 +154,7 @@ const float kMaxMipLodBias = 15.99f; const float kMinMipLodBias = -16.0f; const unsigned kResRetStatusIndex = 4; +const unsigned kVecResRetStatusIndex = 1; /* hctdb_instrhelp.get_max_oload_dims()*/ // OLOAD_DIMS-TEXT:BEGIN @@ -162,24 +163,32 @@ const unsigned kDxilMaxOloadDims = 2; enum class ComponentType : uint32_t { Invalid = 0, - I1, - I16, - U16, - I32, - U32, - I64, - U64, - F16, - F32, - F64, - SNormF16, - UNormF16, - SNormF32, - UNormF32, - SNormF64, - UNormF64, - PackedS8x32, - PackedU8x32, + I1 = 1, + I16 = 2, + U16 = 3, + I32 = 4, + U32 = 5, + I64 = 6, + U64 = 7, + F16 = 8, + F32 = 9, + F64 = 10, + SNormF16 = 11, + UNormF16 = 12, + SNormF32 = 13, + UNormF32 = 14, + SNormF64 = 15, + UNormF64 = 16, + PackedS8x32 = 17, + PackedU8x32 = 18, + + // BEGIN NEW FOR SM 6.9 + U8 = 19, + I8 = 20, + F8_E4M3 = 21, + F8_E5M2 = 22, + // END + LastEntry }; @@ -743,6 +752,19 @@ enum class OpCode : unsigned { CreateHandleForLib = 160, // create resource handle from resource struct for library + // Linear Algebra Operations + MatVecMul = + 305, // Multiplies a MxK dimension matrix and a K sized input vector + MatVecMulAdd = 306, // multiplies a MxK dimension matrix and a K sized input + // vector and adds an M-sized bias vector + OuterProductAccumulate = + 307, // Computes the outer product between column vectors and an MxN + // matrix is accumulated component-wise atomically (with device + // scope) in memory + VectorAccumulate = 308, // Accumulates the components of a vector + // component-wise atomically (with device scope) to + // the corresponding elements of an array in memory + // Mesh shader instructions EmitIndices = 169, // emit a primitive's vertex indices in a mesh shader GetMeshPayload = @@ -1060,7 +1082,7 @@ enum class OpCode : unsigned { NumOpCodes_Dxil_1_7 = 226, NumOpCodes_Dxil_1_8 = 258, - NumOpCodes = 305 // exclusive last value of enumeration + NumOpCodes = 309 // exclusive last value of enumeration }; // OPCODE-ENUM:END @@ -1201,6 +1223,12 @@ enum class OpCodeClass : unsigned { // Library create handle from resource struct (like HL intrinsic) CreateHandleForLib, + // Linear Algebra Operations + MatVecMul, + MatVecMulAdd, + OuterProductAccumulate, + VectorAccumulate, + // Mesh shader instructions EmitIndices, GetMeshPayload, @@ -1385,7 +1413,7 @@ enum class OpCodeClass : unsigned { NumOpClasses_Dxil_1_7 = 153, NumOpClasses_Dxil_1_8 = 174, - NumOpClasses = 190 // exclusive last value of enumeration + NumOpClasses = 194 // exclusive last value of enumeration }; // OPCODECLASS-ENUM:END @@ -1556,6 +1584,38 @@ const unsigned kMSStoreOutputColOpIdx = 3; const unsigned kMSStoreOutputVIdxOpIdx = 4; const unsigned kMSStoreOutputValOpIdx = 5; +// HitObject::MakeMiss +const unsigned kHitObjectMakeMiss_RayDescOpIdx = 3; +const unsigned kHitObjectMakeMiss_NumOp = 11; + +// HitObject::TraceRay +const unsigned kHitObjectTraceRay_RayDescOpIdx = 7; +const unsigned kHitObjectTraceRay_PayloadOpIdx = 15; +const unsigned kHitObjectTraceRay_NumOp = 16; + +// MatVec Ops +const unsigned kMatVecMulInputVectorIdx = 1; +const unsigned kMatVecMulIsInputUnsignedIdx = 2; +const unsigned kMatVecMulInputInterpretationIdx = 3; +const unsigned kMatVecMulMatrixBufferIdx = 4; +const unsigned kMatVecMulMatrixOffsetIdx = 5; +const unsigned kMatVecMulMatrixInterpretationIdx = 6; +const unsigned kMatVecMulMatrixMIdx = 7; +const unsigned kMatVecMulMatrixKIdx = 8; +const unsigned kMatVecMulMatrixLayoutIdx = 9; +const unsigned kMatVecMulMatrixTransposeIdx = 10; +const unsigned kMatVecMulMatrixStrideIdx = 11; +const unsigned kMatVecMulIsOutputUnsignedIdx = 12; + +// MatVecAdd +const unsigned kMatVecMulAddBiasInterpretation = 14; +const unsigned kMatVecMulAddIsOutputUnsignedIdx = 15; + +// Outer Product Accumulate +const unsigned kOuterProdAccMatrixInterpretation = 5; +const unsigned kOuterProdAccMatrixLayout = 6; +const unsigned kOuterProdAccMatrixStride = 7; + // TODO: add operand index for all the OpCodeClass. } // namespace OperandIndex @@ -2127,6 +2187,13 @@ extern const char *kHostLayoutTypePrefix; extern const char *kWaveOpsIncludeHelperLanesString; +enum class LinalgMatrixLayout : uint32_t { + RowMajor = 0, + ColumnMajor = 1, + MulOptimal = 2, + OuterProductOptimal = 3, +}; + } // namespace DXIL } // namespace hlsl diff --git a/include/dxc/DXIL/DxilInstructions.h b/include/dxc/DXIL/DxilInstructions.h index a99c5360d4..9a4030fd8e 100644 --- a/include/dxc/DXIL/DxilInstructions.h +++ b/include/dxc/DXIL/DxilInstructions.h @@ -9918,5 +9918,235 @@ struct DxilInst_RawBufferVectorStore { llvm::APInt(32, (uint64_t)val))); } }; + +/// This instruction Multiplies a MxK dimension matrix and a K sized input +/// vector +struct DxilInst_MatVecMul { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_MatVecMul(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::MatVecMul); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (13 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_inputVector = 1, + arg_isInputUnsigned = 2, + arg_inputInterpretation = 3, + arg_matrixBuffer = 4, + arg_matrixOffset = 5, + arg_matrixIntepretation = 6, + arg_matrixM = 7, + arg_matrixK = 8, + arg_matrixLayout = 9, + arg_matrixTranspose = 10, + arg_matrixStride = 11, + arg_isOutputUnsigned = 12, + }; + // Accessors + llvm::Value *get_inputVector() const { return Instr->getOperand(1); } + void set_inputVector(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_isInputUnsigned() const { return Instr->getOperand(2); } + void set_isInputUnsigned(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_inputInterpretation() const { return Instr->getOperand(3); } + void set_inputInterpretation(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_matrixBuffer() const { return Instr->getOperand(4); } + void set_matrixBuffer(llvm::Value *val) { Instr->setOperand(4, val); } + llvm::Value *get_matrixOffset() const { return Instr->getOperand(5); } + void set_matrixOffset(llvm::Value *val) { Instr->setOperand(5, val); } + llvm::Value *get_matrixIntepretation() const { return Instr->getOperand(6); } + void set_matrixIntepretation(llvm::Value *val) { Instr->setOperand(6, val); } + llvm::Value *get_matrixM() const { return Instr->getOperand(7); } + void set_matrixM(llvm::Value *val) { Instr->setOperand(7, val); } + llvm::Value *get_matrixK() const { return Instr->getOperand(8); } + void set_matrixK(llvm::Value *val) { Instr->setOperand(8, val); } + llvm::Value *get_matrixLayout() const { return Instr->getOperand(9); } + void set_matrixLayout(llvm::Value *val) { Instr->setOperand(9, val); } + llvm::Value *get_matrixTranspose() const { return Instr->getOperand(10); } + void set_matrixTranspose(llvm::Value *val) { Instr->setOperand(10, val); } + llvm::Value *get_matrixStride() const { return Instr->getOperand(11); } + void set_matrixStride(llvm::Value *val) { Instr->setOperand(11, val); } + llvm::Value *get_isOutputUnsigned() const { return Instr->getOperand(12); } + void set_isOutputUnsigned(llvm::Value *val) { Instr->setOperand(12, val); } +}; + +/// This instruction multiplies a MxK dimension matrix and a K sized input +/// vector and adds an M-sized bias vector +struct DxilInst_MatVecMulAdd { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_MatVecMulAdd(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::MatVecMulAdd); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (16 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_inputVector = 1, + arg_isInputUnsigned = 2, + arg_inputInterpretation = 3, + arg_matrixBuffer = 4, + arg_matrixOffset = 5, + arg_matrixIntepretation = 6, + arg_matrixM = 7, + arg_matrixK = 8, + arg_matrixLayout = 9, + arg_matrixTranspose = 10, + arg_matrixStride = 11, + arg_biasBuffer = 12, + arg_biasOffset = 13, + arg_biasIntepretation = 14, + arg_isOutputUnsigned = 15, + }; + // Accessors + llvm::Value *get_inputVector() const { return Instr->getOperand(1); } + void set_inputVector(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_isInputUnsigned() const { return Instr->getOperand(2); } + void set_isInputUnsigned(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_inputInterpretation() const { return Instr->getOperand(3); } + void set_inputInterpretation(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_matrixBuffer() const { return Instr->getOperand(4); } + void set_matrixBuffer(llvm::Value *val) { Instr->setOperand(4, val); } + llvm::Value *get_matrixOffset() const { return Instr->getOperand(5); } + void set_matrixOffset(llvm::Value *val) { Instr->setOperand(5, val); } + llvm::Value *get_matrixIntepretation() const { return Instr->getOperand(6); } + void set_matrixIntepretation(llvm::Value *val) { Instr->setOperand(6, val); } + llvm::Value *get_matrixM() const { return Instr->getOperand(7); } + void set_matrixM(llvm::Value *val) { Instr->setOperand(7, val); } + llvm::Value *get_matrixK() const { return Instr->getOperand(8); } + void set_matrixK(llvm::Value *val) { Instr->setOperand(8, val); } + llvm::Value *get_matrixLayout() const { return Instr->getOperand(9); } + void set_matrixLayout(llvm::Value *val) { Instr->setOperand(9, val); } + llvm::Value *get_matrixTranspose() const { return Instr->getOperand(10); } + void set_matrixTranspose(llvm::Value *val) { Instr->setOperand(10, val); } + llvm::Value *get_matrixStride() const { return Instr->getOperand(11); } + void set_matrixStride(llvm::Value *val) { Instr->setOperand(11, val); } + llvm::Value *get_biasBuffer() const { return Instr->getOperand(12); } + void set_biasBuffer(llvm::Value *val) { Instr->setOperand(12, val); } + llvm::Value *get_biasOffset() const { return Instr->getOperand(13); } + void set_biasOffset(llvm::Value *val) { Instr->setOperand(13, val); } + llvm::Value *get_biasIntepretation() const { return Instr->getOperand(14); } + void set_biasIntepretation(llvm::Value *val) { Instr->setOperand(14, val); } + llvm::Value *get_isOutputUnsigned() const { return Instr->getOperand(15); } + void set_isOutputUnsigned(llvm::Value *val) { Instr->setOperand(15, val); } +}; + +/// This instruction Computes the outer product between column vectors and an +/// MxN matrix is accumulated component-wise atomically (with device scope) in +/// memory +struct DxilInst_OuterProductAccumulate { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_OuterProductAccumulate(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst( + Instr, hlsl::OP::OpCode::OuterProductAccumulate); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (8 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_inputVector1 = 1, + arg_inputVector2 = 2, + arg_matrixBuffer = 3, + arg_matrixOffset = 4, + arg_matrixIntepretation = 5, + arg_matrixLayout = 6, + arg_matrixStride = 7, + }; + // Accessors + llvm::Value *get_inputVector1() const { return Instr->getOperand(1); } + void set_inputVector1(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_inputVector2() const { return Instr->getOperand(2); } + void set_inputVector2(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_matrixBuffer() const { return Instr->getOperand(3); } + void set_matrixBuffer(llvm::Value *val) { Instr->setOperand(3, val); } + llvm::Value *get_matrixOffset() const { return Instr->getOperand(4); } + void set_matrixOffset(llvm::Value *val) { Instr->setOperand(4, val); } + llvm::Value *get_matrixIntepretation() const { return Instr->getOperand(5); } + void set_matrixIntepretation(llvm::Value *val) { Instr->setOperand(5, val); } + int32_t get_matrixIntepretation_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(5)) + ->getZExtValue()); + } + void set_matrixIntepretation_val(int32_t val) { + Instr->setOperand(5, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } + llvm::Value *get_matrixLayout() const { return Instr->getOperand(6); } + void set_matrixLayout(llvm::Value *val) { Instr->setOperand(6, val); } + int32_t get_matrixLayout_val() const { + return (int32_t)(llvm::dyn_cast(Instr->getOperand(6)) + ->getZExtValue()); + } + void set_matrixLayout_val(int32_t val) { + Instr->setOperand(6, llvm::Constant::getIntegerValue( + llvm::IntegerType::get(Instr->getContext(), 32), + llvm::APInt(32, (uint64_t)val))); + } + llvm::Value *get_matrixStride() const { return Instr->getOperand(7); } + void set_matrixStride(llvm::Value *val) { Instr->setOperand(7, val); } +}; + +/// This instruction Accumulates the components of a vector component-wise +/// atomically (with device scope) to the corresponding elements of an array in +/// memory +struct DxilInst_VectorAccumulate { + llvm::Instruction *Instr; + // Construction and identification + DxilInst_VectorAccumulate(llvm::Instruction *pInstr) : Instr(pInstr) {} + operator bool() const { + return hlsl::OP::IsDxilOpFuncCallInst(Instr, + hlsl::OP::OpCode::VectorAccumulate); + } + // Validation support + bool isAllowed() const { return true; } + bool isArgumentListValid() const { + if (4 != llvm::dyn_cast(Instr)->getNumArgOperands()) + return false; + return true; + } + // Metadata + bool requiresUniformInputs() const { return false; } + // Operand indexes + enum OperandIdx { + arg_inputVector = 1, + arg_arrayBuffer = 2, + arg_arrayOffset = 3, + }; + // Accessors + llvm::Value *get_inputVector() const { return Instr->getOperand(1); } + void set_inputVector(llvm::Value *val) { Instr->setOperand(1, val); } + llvm::Value *get_arrayBuffer() const { return Instr->getOperand(2); } + void set_arrayBuffer(llvm::Value *val) { Instr->setOperand(2, val); } + llvm::Value *get_arrayOffset() const { return Instr->getOperand(3); } + void set_arrayOffset(llvm::Value *val) { Instr->setOperand(3, val); } +}; // INSTR-HELPER:END } // namespace hlsl diff --git a/include/dxc/DxilContainer/DxcContainerBuilder.h b/include/dxc/DxilContainer/DxcContainerBuilder.h index 9a3241525c..e79fec18c8 100644 --- a/include/dxc/DxilContainer/DxcContainerBuilder.h +++ b/include/dxc/DxilContainer/DxcContainerBuilder.h @@ -45,8 +45,7 @@ class DxcContainerBuilder : public IDxcContainerBuilder { return DoBasicQueryInterface(this, riid, ppvObject); } - void Init(const char *warning = nullptr) { - m_warning = warning; + void Init() { m_RequireValidation = false; m_HasPrivateData = false; m_HashFunction = nullptr; @@ -67,7 +66,6 @@ class DxcContainerBuilder : public IDxcContainerBuilder { PartList m_parts; CComPtr m_pContainer; - const char *m_warning; bool m_RequireValidation; bool m_HasPrivateData; // Function to compute hash when valid dxil container is built diff --git a/include/dxc/DxilContainer/RDAT_LibraryTypes.inl b/include/dxc/DxilContainer/RDAT_LibraryTypes.inl index 4b58b406c2..902f2e9652 100644 --- a/include/dxc/DxilContainer/RDAT_LibraryTypes.inl +++ b/include/dxc/DxilContainer/RDAT_LibraryTypes.inl @@ -565,9 +565,13 @@ RDAT_DXIL_ENUM_START(hlsl::DXIL::ComponentType, uint32_t) RDAT_ENUM_VALUE_NODEF(UNormF64) RDAT_ENUM_VALUE_NODEF(PackedS8x32) RDAT_ENUM_VALUE_NODEF(PackedU8x32) + RDAT_ENUM_VALUE_NODEF(U8) + RDAT_ENUM_VALUE_NODEF(I8) + RDAT_ENUM_VALUE_NODEF(F8_E4M3) + RDAT_ENUM_VALUE_NODEF(F8_E5M2) RDAT_ENUM_VALUE_NODEF(LastEntry) #if DEF_RDAT_ENUMS == DEF_RDAT_DUMP_IMPL - static_assert((unsigned)hlsl::DXIL::ComponentType::LastEntry == 19, + static_assert((unsigned)hlsl::DXIL::ComponentType::LastEntry == 23, "otherwise, RDAT_DXIL_ENUM definition needs updating"); #endif RDAT_ENUM_END() diff --git a/include/dxc/HLSL/HLOperations.h b/include/dxc/HLSL/HLOperations.h index a7db8612a6..79cbadc42c 100644 --- a/include/dxc/HLSL/HLOperations.h +++ b/include/dxc/HLSL/HLOperations.h @@ -396,7 +396,12 @@ const unsigned kAnnotateHandleResourceTypeOpIdx = 3; // TraceRay. const unsigned kTraceRayRayDescOpIdx = 7; -const unsigned kTraceRayPayLoadOpIdx = 8; +// kTraceRayPayloadPreOpIdx is before flattening the RayDesc +const unsigned kTraceRayPayloadPreOpIdx = 8; +// kTraceRayPayloadOpIdx is after flattening the RayDesc +const unsigned kTraceRayPayloadOpIdx = 11; +const unsigned kTraceRay_PreNumOp = 9; +const unsigned kTraceRay_NumOp = 12; // AllocateRayQuery const unsigned kAllocateRayQueryRayFlagsIdx = 1; @@ -407,6 +412,10 @@ const unsigned kCallShaderPayloadOpIdx = 2; // TraceRayInline. const unsigned kTraceRayInlineRayDescOpIdx = 5; +// kTraceRayInlinePayloadPreOpIdx is before flattening the RayDesc +const unsigned kTraceRayInlinePayloadPreOpIdx = 6; +// kTraceRayInlinePayloadOpIdx is after flattening the RayDesc +const unsigned kTraceRayInlinePayloadOpIdx = 9; // ReportIntersection. const unsigned kReportIntersectionAttributeOpIdx = 3; @@ -435,8 +444,75 @@ const unsigned kAnnotateNodeRecordHandleNodeRecordPropIdx = 2; // HitObject::MakeMiss const unsigned kHitObjectMakeMiss_NumOp = 8; -const unsigned kHitObjectMakeMissRayDescOpIdx = 4; - +const unsigned kHitObjectMakeMiss_RayDescOpIdx = 4; + +// HitObject::TraceRay +const unsigned kHitObjectTraceRay_RayDescOpIdx = 8; +// kHitObjectTraceRay_PayloadPreOpIdx is before flattening the RayDesc +const unsigned kHitObjectTraceRay_PayloadPreOpIdx = 9; +// kHitObjectTraceRay_PayloadOpIdx is after flattening the RayDesc +const unsigned kHitObjectTraceRay_PayloadOpIdx = 12; +const unsigned kHitObjectTraceRay_PreNumOp = 10; +const unsigned kHitObjectTraceRay_NumOp = 13; + +// HitObject::Invoke +const unsigned kHitObjectInvoke_PayloadOpIdx = 2; + +// HitObject::FromRayQuery +const unsigned kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx = 4; +const unsigned kHitObjectFromRayQuery_WithAttrs_NumOp = 5; + +// HitObject::GetAttributes +const unsigned kHitObjectGetAttributes_AttributeOpIdx = 2; + +// Linear Algebra Operations + +// MatVecMul +const unsigned kMatVecMulOutputVectorIdx = 1; +const unsigned kMatVecMulIsOutputUnsignedIdx = 2; +const unsigned kMatVecMulInputVectorIdx = 3; +const unsigned kMatVecMulIsInputUnsignedIdx = 4; +const unsigned kMatVecMulInputInterpretationIdx = 5; +const unsigned kMatVecMulMatrixBufferIdx = 6; +const unsigned kMatVecMulMatrixOffsetIdx = 7; +const unsigned kMatVecMulMatrixInterpretationIdx = 8; +const unsigned kMatVecMulMatrixMIdx = 9; +const unsigned kMatVecMulMatrixKIdx = 10; +const unsigned kMatVecMulMatrixLayoutIdx = 11; +const unsigned kMatVecMulMatrixTransposeIdx = 12; +const unsigned kMatVecMulMatrixStrideIdx = 13; + +// MatVecMulAdd +const unsigned kMatVecMulAddOutputVectorIdx = 1; +const unsigned kMatVecMulAddIsOutputUnsignedIdx = 2; +const unsigned kMatVecMulAddInputVectorIdx = 3; +const unsigned kMatVecMulAddIsInputUnsignedIdx = 4; +const unsigned kMatVecMulAddInputInterpretationIdx = 5; +const unsigned kMatVecMulAddMatrixBufferIdx = 6; +const unsigned kMatVecMulAddMatrixOffsetIdx = 7; +const unsigned kMatVecMulAddMatrixInterpretationIdx = 8; +const unsigned kMatVecMulAddMatrixMIdx = 9; +const unsigned kMatVecMulAddMatrixKIdx = 10; +const unsigned kMatVecMulAddMatrixLayoutIdx = 11; +const unsigned kMatVecMulAddMatrixTransposeIdx = 12; +const unsigned kMatVecMulAddMatrixStrideIdx = 13; +const unsigned kMatVecMulAddBiasBufferIdx = 14; +const unsigned kMatVecMulAddBiasOffsetIdx = 15; +const unsigned kMatVecMulAddBiasInterpretationIdx = 16; + +// OuterProductAccumulate +const unsigned kOuterProdAccInputVec1Idx = 1; +const unsigned kOuterProdAccInputVec2Idx = 2; +const unsigned kOuterProdAccMatrixIdx = 3; +const unsigned kOuterProdAccMatrixOffsetIdx = 4; +const unsigned kOuterProdAccMatrixInterpretationIdx = 5; +const unsigned kOuterProdAccMatrixLayoutIdx = 6; +const unsigned kOuterProdAccMatrixStrideIdx = 7; + +// Vector Accumulate +const unsigned kVectorAccInputVecIdx = 1; +const unsigned kVectorAccMatrixIdx = 2; +const unsigned kVectorAccMatrixOffsetIdx = 3; } // namespace HLOperandIndex llvm::Function *GetOrCreateHLFunction(llvm::Module &M, diff --git a/include/dxc/HlslIntrinsicOp.h b/include/dxc/HlslIntrinsicOp.h index d37c27a38e..197bd3e1f5 100644 --- a/include/dxc/HlslIntrinsicOp.h +++ b/include/dxc/HlslIntrinsicOp.h @@ -107,6 +107,10 @@ enum class IntrinsicOp { IOP_WorldToObject = 99, IOP_WorldToObject3x4 = 100, IOP_WorldToObject4x3 = 101, + IOP___builtin_MatVecMul = 390, + IOP___builtin_MatVecMulAdd = 391, + IOP___builtin_OuterProductAccumulate = 392, + IOP___builtin_VectorAccumulate = 393, IOP_abort = 102, IOP_abs = 103, IOP_acos = 104, @@ -396,7 +400,7 @@ enum class IntrinsicOp { IOP_usign = 355, MOP_InterlockedUMax = 356, MOP_InterlockedUMin = 357, - Num_Intrinsics = 390, + Num_Intrinsics = 394, }; inline bool HasUnsignedIntrinsicOpcode(IntrinsicOp opcode) { switch (opcode) { diff --git a/include/dxc/Support/HLSLOptions.h b/include/dxc/Support/HLSLOptions.h index 56e95a1659..31ca3d1c14 100644 --- a/include/dxc/Support/HLSLOptions.h +++ b/include/dxc/Support/HLSLOptions.h @@ -114,13 +114,6 @@ struct RewriterOpts { bool DeclGlobalCB = false; // OPT_rw_decl_global_cb }; -enum class ValidatorSelection : int { - Auto, // Try DXIL.dll; fallback to internal validator - Internal, // Force internal validator (even if DXIL.dll is present) - External, // Use DXIL.dll, failing compilation if not available - Invalid = -1 // Invalid -}; - /// Use this class to capture all options. class DxcOpts { public: @@ -225,8 +218,6 @@ class DxcOpts { bool ResMayAlias = false; // OPT_res_may_alias unsigned long ValVerMajor = UINT_MAX, ValVerMinor = UINT_MAX; // OPT_validator_version - ValidatorSelection SelectValidator = - ValidatorSelection::Auto; // OPT_select_validator unsigned ScanLimit = 0; // OPT_memdep_block_scan_limit bool ForceZeroStoreLifetimes = false; // OPT_force_zero_store_lifetimes bool EnableLifetimeMarkers = false; // OPT_enable_lifetime_markers diff --git a/include/dxc/Support/HLSLOptions.td b/include/dxc/Support/HLSLOptions.td index ea000f4877..4a38e275c3 100644 --- a/include/dxc/Support/HLSLOptions.td +++ b/include/dxc/Support/HLSLOptions.td @@ -317,8 +317,6 @@ def print_before_all : Flag<["-", "/"], "print-before-all">, Group; def print_before : Separate<["-", "/"], "print-before">, Group, Flags<[CoreOption, HelpHidden]>, HelpText<"Print LLVM IR before a specific pass. May be specificied multiple times.">; -def select_validator : Separate<["-", "/"], "select-validator">, Group, Flags<[CoreOption, HelpHidden]>, - HelpText<"Select validator: auto: (default) use DXIL.dll if found, otherwise use internal; internal: internal non-signing validator; external: use DXIL.dll if found, otherwise fail compilation.">; def print_after_all : Flag<["-", "/"], "print-after-all">, Group, Flags<[CoreOption, HelpHidden]>, HelpText<"Print LLVM IR after each pass.">; def print_after : Separate<["-", "/"], "print-after">, Group, Flags<[CoreOption, HelpHidden]>, @@ -370,7 +368,7 @@ def fvk_bind_register : MultiArg<["-"], "fvk-bind-register", 4>, MetaVarName<"; def vkbr : MultiArg<["-"], "vkbr", 4>, Flags<[CoreOption, DriverOption]>, Alias; def fvk_invert_y: Flag<["-"], "fvk-invert-y">, Group, Flags<[CoreOption, DriverOption]>, - HelpText<"Negate SV_Position.y before writing to stage output in VS/DS/GS to accommodate Vulkan's coordinate system">; + HelpText<"Negate SV_Position.y before writing to stage output in VS/DS/GS/MS/Lib to accommodate Vulkan's coordinate system">; def fvk_use_dx_position_w: Flag<["-"], "fvk-use-dx-position-w">, Group, Flags<[CoreOption, DriverOption]>, HelpText<"Reciprocate SV_Position.w after reading from stage input in PS to accommodate the difference between Vulkan and DirectX">; def fvk_support_nonzero_base_instance: Flag<["-"], "fvk-support-nonzero-base-instance">, Group, Flags<[CoreOption, DriverOption]>, @@ -405,6 +403,12 @@ def fspv_enable_maximal_reconvergence: Flag<["-"], "fspv-enable-maximal-reconver HelpText<"Enables the MaximallyReconvergesKHR execution mode for this module.">; def fspv_use_vulkan_memory_model: Flag<["-"], "fspv-use-vulkan-memory-model">, Group, Flags<[CoreOption, DriverOption]>, HelpText<"Generates SPIR-V modules that use the Vulkan memory model instead of GLSL450.">; +def fspv_use_unknown_image_format + : Flag<["-"], "fspv-use-unknown-image-format">, + Group, + Flags<[CoreOption, DriverOption]>, + HelpText<"For storage images and texel buffers, sets the default format to 'Unknown' when not specified via the `vk::image_format` attribute. If this option is not used, the format is inferred from the resource's data type.">; + def fvk_auto_shift_bindings: Flag<["-"], "fvk-auto-shift-bindings">, Group, Flags<[CoreOption, DriverOption]>, HelpText<"Apply fvk-*-shift to resources without an explicit register assignment.">; def Wno_vk_ignored_features : Joined<["-"], "Wno-vk-ignored-features">, Group, Flags<[CoreOption, DriverOption, HelpHidden]>, diff --git a/include/dxc/Support/SPIRVOptions.h b/include/dxc/Support/SPIRVOptions.h index 1b88ef4def..352cf6c2ec 100644 --- a/include/dxc/Support/SPIRVOptions.h +++ b/include/dxc/Support/SPIRVOptions.h @@ -71,6 +71,7 @@ struct SpirvCodeGenOptions { bool fixFuncCallArguments; bool enableMaximalReconvergence; bool useVulkanMemoryModel; + bool useUnknownImageFormat; bool IEEEStrict; /// Maximum length in words for the OpString literal containing the shader /// source for DebugSource and DebugSourceContinued. If the source code length diff --git a/include/dxc/Test/HlslTestUtils.h b/include/dxc/Test/HlslTestUtils.h index 0e37ccdcff..dd89fda676 100644 --- a/include/dxc/Test/HlslTestUtils.h +++ b/include/dxc/Test/HlslTestUtils.h @@ -10,6 +10,8 @@ /////////////////////////////////////////////////////////////////////////////// // *** THIS FILE CANNOT TAKE ANY LLVM DEPENDENCIES *** // +#ifndef HLSLTESTUTILS_H +#define HLSLTESTUTILS_H #include #include @@ -258,6 +260,29 @@ inline void LogErrorFmt(const wchar_t *fmt, ...) { WEX::Logging::Log::Error(buf.data()); } +inline void LogErrorFmtThrow(const char *fileName, int line, const wchar_t *fmt, + ...) { + va_list args; + va_start(args, fmt); + std::wstring buf(vFormatToWString(fmt, args)); + va_end(args); + + std::wstringstream wss; + wss << L"Error in file: " << fileName << L" at line: " << line << L"\n" + << buf.data() << L"\n" + << buf; + + WEX::Logging::Log::Error(wss.str().c_str()); + + // Throws an exception to abort the test. + VERIFY_FAIL(L"Test error"); +} + +// Macro to pass the file name and line number. Otherwise TAEF prints this file +// and line number. +#define LOG_ERROR_FMT_THROW(fmt, ...) \ + hlsl_test::LogErrorFmtThrow(__FILE__, __LINE__, fmt, __VA_ARGS__) + inline std::wstring GetPathToHlslDataFile(const wchar_t *relative, LPCWSTR paramName = HLSLDATAFILEPARAM, @@ -459,15 +484,17 @@ inline bool GetTestParamUseWARP(bool defaultVal) { #ifdef FP_SUBNORMAL -inline bool isdenorm(float f) { return FP_SUBNORMAL == std::fpclassify(f); } +template inline bool isdenorm(T f) { + return FP_SUBNORMAL == std::fpclassify(f); +} #else -inline bool isdenorm(float f) { - return (std::numeric_limits::denorm_min() <= f && - f < std::numeric_limits::min()) || - (-std::numeric_limits::min() < f && - f <= -std::numeric_limits::denorm_min()); +template inline bool isdenorm(T f) { + return (std::numeric_limits::denorm_min() <= f && + f < std::numeric_limits::min()) || + (-std::numeric_limits::min() < f && + f <= -std::numeric_limits::denorm_min()); } #endif // FP_SUBNORMAL @@ -515,6 +542,44 @@ inline bool isnanFloat16(uint16_t val) { uint16_t ConvertFloat32ToFloat16(float val) throw(); float ConvertFloat16ToFloat32(uint16_t val) throw(); +inline bool CompareDoubleULP( + const double &Src, const double &Ref, int64_t ULPTolerance, + hlsl::DXIL::Float32DenormMode Mode = hlsl::DXIL::Float32DenormMode::Any) { + if (Src == Ref) { + return true; + } + if (std::isnan(Src)) { + return std::isnan(Ref); + } + + if (Mode == hlsl::DXIL::Float32DenormMode::Any) { + // If denorm expected, output can be sign preserved zero. Otherwise output + // should pass the regular ulp testing. + if (isdenorm(Ref) && Src == 0 && std::signbit(Src) == std::signbit(Ref)) + return true; + } + + // For FTZ or Preserve mode, we should get the expected number within + // ULPTolerance for any operations. + int64_t Diff = *((const uint64_t *)&Src) - *((const uint64_t *)&Ref); + + uint64_t AbsoluteDiff = Diff < 0 ? -Diff : Diff; + return AbsoluteDiff <= (uint64_t)ULPTolerance; +} + +inline bool CompareDoubleEpsilon(const double &Src, const double &Ref, + float Epsilon) { + if (Src == Ref) { + return true; + } + if (std::isnan(Src)) { + return std::isnan(Ref); + } + // For FTZ or Preserve mode, we should get the expected number within + // epsilon for any operations. + return fabs(Src - Ref) < Epsilon; +} + inline bool CompareFloatULP( const float &fsrc, const float &fref, int ULPTolerance, hlsl::DXIL::Float32DenormMode mode = hlsl::DXIL::Float32DenormMode::Any) { @@ -566,12 +631,26 @@ inline bool CompareFloatRelativeEpsilon( inline bool CompareHalfULP(const uint16_t &fsrc, const uint16_t &fref, float ULPTolerance) { + // Treat +0 and -0 as equal + if ((fsrc & ~FLOAT16_BIT_SIGN) == 0 && (fref & ~FLOAT16_BIT_SIGN) == 0) + return true; if (fsrc == fref) return true; - if (isnanFloat16(fsrc)) - return isnanFloat16(fref); + + const bool nanRef = isnanFloat16(fref); + const bool nanSrc = isnanFloat16(fsrc); + if (nanRef || nanSrc) + return nanRef && nanSrc; + + // Map to monotonic ordering for correct ULP diff + auto toOrdered = [](uint16_t h) -> int { + return (h & FLOAT16_BIT_SIGN) ? (~h & 0xFFFF) : (h | 0x8000); + }; + // 16-bit floating point numbers must preserve denorms - int diff = fsrc - fref; + int i_fsrc = toOrdered(fsrc); + int i_fref = toOrdered(fref); + int diff = i_fsrc - i_fref; unsigned int uDiff = diff < 0 ? -diff : diff; return uDiff <= (unsigned int)ULPTolerance; } @@ -735,3 +814,5 @@ inline UINT GetByteSizeForFormat(DXGI_FORMAT value) { } } #endif + +#endif // HLSLTESTUTILS_H diff --git a/include/dxc/dxcapi.internal.h b/include/dxc/dxcapi.internal.h index 28bd3e7066..46a485206e 100644 --- a/include/dxc/dxcapi.internal.h +++ b/include/dxc/dxcapi.internal.h @@ -7,9 +7,6 @@ // // // Provides non-public declarations for the DirectX Compiler component. // // // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // -// All rights reserved. // -// // /////////////////////////////////////////////////////////////////////////////// #ifndef __DXC_API_INTERNAL__ @@ -133,11 +130,15 @@ enum LEGAL_INTRINSIC_COMPTYPES { LICOMPTYPE_HIT_OBJECT = 51, LICOMPTYPE_RAY_QUERY = 52, + LICOMPTYPE_LINALG = 53, // f32, partial-precision-f32, f16, + // i32, i16, u32, u16, + // int8_4packed, uint8_4packed + #ifdef ENABLE_SPIRV_CODEGEN - LICOMPTYPE_VK_BUFFER_POINTER = 53, - LICOMPTYPE_COUNT = 54 + LICOMPTYPE_VK_BUFFER_POINTER = 54, + LICOMPTYPE_COUNT = 55 #else - LICOMPTYPE_COUNT = 53 + LICOMPTYPE_COUNT = 54 #endif }; diff --git a/include/llvm/ADT/IntervalMap.h b/include/llvm/ADT/IntervalMap.h index 2a00667227..5bb948727e 100644 --- a/include/llvm/ADT/IntervalMap.h +++ b/include/llvm/ADT/IntervalMap.h @@ -320,7 +320,11 @@ class NodeBase { return Count; } else { // We want to shrink, copy to sib. - unsigned Count = std::min(std::min(unsigned(-Add), Size), N - SSize); + // Count <= INT_MAX: Since Add is an int, unsigned(-Add) <= 2^31, so + // std::min result <= INT_MAX. Meaning its safe to store the result in an + // int to avoid the compiler warning for '-Count' if we were to use an + // unsigned value instead. + int Count = std::min(std::min(unsigned(-Add), Size), N - SSize); transferToLeftSib(Size, Sib, SSize, Count); return -Count; } diff --git a/include/llvm/ADT/StringExtras.h b/include/llvm/ADT/StringExtras.h index 270989b349..684ee0f9dc 100644 --- a/include/llvm/ADT/StringExtras.h +++ b/include/llvm/ADT/StringExtras.h @@ -36,12 +36,12 @@ static inline StringRef toStringRef(bool B) { /// Interpret the given character \p C as a hexadecimal digit and return its /// value. /// -/// If \p C is not a valid hex digit, -1U is returned. +/// If \p C is not a valid hex digit, ~0U is returned. static inline unsigned hexDigitValue(char C) { if (C >= '0' && C <= '9') return C-'0'; if (C >= 'a' && C <= 'f') return C-'a'+10U; if (C >= 'A' && C <= 'F') return C-'A'+10U; - return -1U; + return ~0U; } /// utohex_buffer - Emit the specified number into the buffer specified by diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index d4a6371216..ba63d80e94 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -191,12 +191,12 @@ class SDValue { template<> struct DenseMapInfo { static inline SDValue getEmptyKey() { SDValue V; - V.ResNo = -1U; + V.ResNo = ~0U; return V; } static inline SDValue getTombstoneKey() { SDValue V; - V.ResNo = -2U; + V.ResNo = ~1U; return V; } static unsigned getHashValue(const SDValue &Val) { @@ -879,7 +879,7 @@ inline SDValue::SDValue(SDNode *node, unsigned resno) : Node(node), ResNo(resno) { assert((!Node || ResNo < Node->getNumValues()) && "Invalid result number for the given node!"); - assert(ResNo < -2U && "Cannot use result numbers reserved for DenseMaps."); + assert(ResNo < ~1U && "Cannot use result numbers reserved for DenseMaps."); } inline unsigned SDValue::getOpcode() const { diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h index 791f010a88..c34cfab284 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugAranges.h @@ -32,12 +32,13 @@ class DWARFDebugAranges { void construct(); struct Range { - explicit Range(uint64_t LowPC = -1ULL, uint64_t HighPC = -1ULL, - uint32_t CUOffset = -1U) - : LowPC(LowPC), Length(HighPC - LowPC), CUOffset(CUOffset) {} + explicit Range(uint64_t LowPC = std::numeric_limits::max(), + uint64_t HighPC = std::numeric_limits::max(), + uint32_t CUOffset = std::numeric_limits::max()) + : LowPC(LowPC), Length(HighPC - LowPC), CUOffset(CUOffset) {} void setHighPC(uint64_t HighPC) { - if (HighPC == -1ULL || HighPC <= LowPC) + if (HighPC == std::numeric_limits::max() || HighPC <= LowPC) Length = 0; else Length = HighPC - LowPC; @@ -45,7 +46,7 @@ class DWARFDebugAranges { uint64_t HighPC() const { if (Length) return LowPC + Length; - return -1ULL; + return std::numeric_limits::max(); } bool containsAddress(uint64_t Address) const { diff --git a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h index c930bd603d..8eea252b60 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h +++ b/include/llvm/DebugInfo/DWARF/DWARFDebugRangeList.h @@ -49,9 +49,9 @@ class DWARFDebugRangeList { bool isBaseAddressSelectionEntry(uint8_t AddressSize) const { assert(AddressSize == 4 || AddressSize == 8); if (AddressSize == 4) - return StartAddress == -1U; + return StartAddress == ~0U; else - return StartAddress == -1ULL; + return StartAddress == ~0ULL; } }; diff --git a/include/llvm/Support/BlockFrequency.h b/include/llvm/Support/BlockFrequency.h index 4304a253b2..d7d6d741f4 100644 --- a/include/llvm/Support/BlockFrequency.h +++ b/include/llvm/Support/BlockFrequency.h @@ -15,6 +15,7 @@ #define LLVM_SUPPORT_BLOCKFREQUENCY_H #include "llvm/Support/DataTypes.h" +#include namespace llvm { @@ -29,7 +30,9 @@ class BlockFrequency { BlockFrequency(uint64_t Freq = 0) : Frequency(Freq) { } /// \brief Returns the maximum possible frequency, the saturation value. - static uint64_t getMaxFrequency() { return -1ULL; } + static uint64_t getMaxFrequency() { + return std::numeric_limits::max(); + } /// \brief Returns the frequency as a fixpoint number scaled by the entry /// frequency. diff --git a/include/llvm/Support/LEB128.h b/include/llvm/Support/LEB128.h index 1324cb82ca..f8a2843412 100644 --- a/include/llvm/Support/LEB128.h +++ b/include/llvm/Support/LEB128.h @@ -103,7 +103,7 @@ inline int64_t decodeSLEB128(const uint8_t *p, unsigned *n = nullptr) { } while (Byte >= 128); // Sign extend negative numbers. if (Byte & 0x40) - Value |= (-1ULL) << Shift; + Value |= (~0ULL) << Shift; if (n) *n = (unsigned)(p - orig_p); return Value; diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index aa0f9ed873..956c334374 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -1117,7 +1117,11 @@ AliasResult BasicAliasAnalysis::aliasGEP( // stripped a gep with negative index ('gep , -1, ...). if (V1Size != MemoryLocation::UnknownSize && V2Size != MemoryLocation::UnknownSize) { - if (-(uint64_t)GEP1BaseOffset < V1Size) + // GEP1BaseOffset is negative in this else block and because we're + // assigning to an unsigned variable, we can make use of + // -I == (~I + 1) to compute the absolute value of GEP1BaseOffset. + const uint64_t GEP1BaseOffsetAbs = (~GEP1BaseOffset + 1ULL); + if (GEP1BaseOffsetAbs < V1Size) return PartialAlias; return NoAlias; } diff --git a/lib/Analysis/ConstantFolding.cpp b/lib/Analysis/ConstantFolding.cpp index 69c9b10b60..0167bdf0a1 100644 --- a/lib/Analysis/ConstantFolding.cpp +++ b/lib/Analysis/ConstantFolding.cpp @@ -187,7 +187,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { // Shift it to the right place, depending on endianness. Src = ConstantExpr::getShl(Src, ConstantInt::get(Src->getType(), ShiftAmt)); - ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; + ShiftAmt += isLittleEndian ? SrcBitSize : (~SrcBitSize + 1U); // Mix it in. Elt = ConstantExpr::getOr(Elt, Src); @@ -213,7 +213,7 @@ static Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { // endianness. Constant *Elt = ConstantExpr::getLShr(Src, ConstantInt::get(Src->getType(), ShiftAmt)); - ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; + ShiftAmt += isLittleEndian ? DstBitSize : (~DstBitSize + 1U); // Truncate the element to an integer with the same pointer size and // convert the element back to a pointer using a inttoptr. diff --git a/lib/Analysis/InstructionSimplify.cpp b/lib/Analysis/InstructionSimplify.cpp index 89c7cc7a3e..96c0b3302d 100644 --- a/lib/Analysis/InstructionSimplify.cpp +++ b/lib/Analysis/InstructionSimplify.cpp @@ -4109,7 +4109,7 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { // Shift it to the right place, depending on endianness. Src = ConstantExpr::getShl(Src, ConstantInt::get(Src->getType(), ShiftAmt)); - ShiftAmt += isLittleEndian ? SrcBitSize : -SrcBitSize; + ShiftAmt += isLittleEndian ? SrcBitSize : (~SrcBitSize + 1U); // Mix it in. Elt = ConstantExpr::getOr(Elt, Src); @@ -4144,9 +4144,9 @@ Constant *FoldBitCast(Constant *C, Type *DestTy, const DataLayout &DL) { for (unsigned j = 0; j != Ratio; ++j) { // Shift the piece of the value into the right place, depending on // endianness. - Constant *Elt = ConstantExpr::getLShr(Src, - ConstantInt::get(Src->getType(), ShiftAmt)); - ShiftAmt += isLittleEndian ? DstBitSize : -DstBitSize; + Constant *Elt = ConstantExpr::getLShr( + Src, ConstantInt::get(Src->getType(), ShiftAmt)); + ShiftAmt += isLittleEndian ? DstBitSize : (~DstBitSize + 1U); // Truncate the element to an integer with the same pointer size and // convert the element back to a pointer using a inttoptr. diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index d6316dc75b..d855df32dc 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -1179,7 +1179,7 @@ bool MemoryDepChecker::areDepsSafe(DepCandidates &AccessSets, MemAccessInfoSet &CheckDeps, const ValueToValueMap &Strides) { - MaxSafeDepDistBytes = -1U; + MaxSafeDepDistBytes = std::numeric_limits::max(); while (!CheckDeps.empty()) { MemAccessInfo CurAccess = *CheckDeps.begin(); @@ -1677,8 +1677,8 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const ValueToValueMap &Strides) : PtrRtChecking(SE), DepChecker(SE, L), TheLoop(L), SE(SE), DL(DL), TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), - MaxSafeDepDistBytes(-1U), CanVecMem(false), - StoreToLoopInvariantAddress(false) { + MaxSafeDepDistBytes(std::numeric_limits::max()), + CanVecMem(false), StoreToLoopInvariantAddress(false) { if (canAnalyzeLoop()) analyzeLoop(Strides); } diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index 185c291d66..a87128ca26 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2401,7 +2401,7 @@ uint64_t BitcodeReader::decodeSignRotatedValue(uint64_t V) { if ((V & 1) == 0) return V >> 1; if (V != 1) - return -(V >> 1); + return ~(V >> 1) + 1; // There is no such thing as -0 with integers. "-0" really means MININT. return 1ULL << 63; } diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index 0718c81451..f02344ae64 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1360,7 +1360,7 @@ static void emitSignedInt64(SmallVectorImpl &Vals, uint64_t V) { if ((int64_t)V >= 0) Vals.push_back(V << 1); else - Vals.push_back((-V << 1) | 1); + Vals.push_back(((~V + 1) << 1) | 1); } static void WriteConstants(unsigned FirstVal, unsigned LastVal, @@ -1437,7 +1437,7 @@ static void WriteConstants(unsigned FirstVal, unsigned LastVal, continue; } const Constant *C = cast(V); - unsigned Code = -1U; + unsigned Code = ~0U; unsigned AbbrevToUse = 0; if (C->isNullValue()) { Code = bitc::CST_CODE_NULL; diff --git a/lib/DXIL/DxilOperations.cpp b/lib/DXIL/DxilOperations.cpp index f614ba9d14..253121346a 100644 --- a/lib/DXIL/DxilOperations.cpp +++ b/lib/DXIL/DxilOperations.cpp @@ -765,32 +765,32 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = { "unary", Attribute::ReadNone, 1, - {{0x403}}, - {{0x3}}}, // Overloads: hfgetNumParams() <= 1) return nullptr; return FT->getParamType(1); @@ -6291,6 +6382,19 @@ llvm::Type *OP::GetOverloadType(OpCode opCode, llvm::Function *F) { StructType *ST = cast(Ty); return ST->getElementType(0); } + case OpCode::MatVecMul: + case OpCode::MatVecMulAdd: + if (FT->getNumParams() < 2) + return nullptr; + return llvm::StructType::get(Ctx, + {FT->getReturnType(), FT->getParamType(1)}); + + case OpCode::OuterProductAccumulate: + if (FT->getNumParams() < 3) + return nullptr; + return llvm::StructType::get(Ctx, + {FT->getParamType(1), FT->getParamType(2)}); + // OPCODE-OLOAD-TYPES:END default: return Ty; @@ -6334,7 +6438,7 @@ Type *OP::GetFourI32Type() const { return m_pFourI32Type; } Type *OP::GetFourI16Type() const { return m_pFourI16Type; } bool OP::IsResRetType(llvm::Type *Ty) { - if (!Ty->isStructTy()) + if (!Ty || !Ty->isStructTy()) return false; for (Type *ResTy : m_pResRetType) { if (Ty == ResTy) diff --git a/lib/DXIL/DxilUtil.cpp b/lib/DXIL/DxilUtil.cpp index 966c2e189c..cc0b509772 100644 --- a/lib/DXIL/DxilUtil.cpp +++ b/lib/DXIL/DxilUtil.cpp @@ -181,11 +181,11 @@ void PrintUnescapedString(StringRef Name, raw_ostream &Out) { if (C == '\\') { C = Name[++i]; unsigned value = hexDigitValue(C); - if (value != -1U) { + if (value != ~0U) { C = (unsigned char)value; unsigned value2 = hexDigitValue(Name[i + 1]); - assert(value2 != -1U && "otherwise, not a two digit hex escape"); - if (value2 != -1U) { + assert(value2 != ~0U && "otherwise, not a two digit hex escape"); + if (value2 != ~0U) { C = (C << 4) + (unsigned char)value2; ++i; } diff --git a/lib/DxcSupport/HLSLOptions.cpp b/lib/DxcSupport/HLSLOptions.cpp index 1ce7d0dfc0..b3eb422eb9 100644 --- a/lib/DxcSupport/HLSLOptions.cpp +++ b/lib/DxcSupport/HLSLOptions.cpp @@ -1033,20 +1033,6 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude, opts.ValVerMinor = (unsigned long)minor64; } - llvm::StringRef valSelectStr = Args.getLastArgValue(OPT_select_validator); - if (!valSelectStr.empty()) { - opts.SelectValidator = llvm::StringSwitch(valSelectStr) - .Case("auto", ValidatorSelection::Auto) - .Case("internal", ValidatorSelection::Internal) - .Case("external", ValidatorSelection::External) - .Default(ValidatorSelection::Invalid); - if (opts.SelectValidator == ValidatorSelection::Invalid) { - errors << "Unsupported value '" << valSelectStr - << "for -select-validator option."; - return 1; - } - } - if (opts.IsLibraryProfile() && Minor == 0xF) { if (opts.ValVerMajor != UINT_MAX && opts.ValVerMajor != 0) { errors << "Offline library profile cannot be used with non-zero " @@ -1134,6 +1120,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude, Args.hasFlag(OPT_fspv_enable_maximal_reconvergence, OPT_INVALID, false); opts.SpirvOptions.useVulkanMemoryModel = Args.hasFlag(OPT_fspv_use_vulkan_memory_model, OPT_INVALID, false); + opts.SpirvOptions.useUnknownImageFormat = + Args.hasFlag(OPT_fspv_use_unknown_image_format, OPT_INVALID, false); if (!handleVkShiftArgs(Args, OPT_fvk_b_shift, "b", &opts.SpirvOptions.bShift, errors) || diff --git a/lib/DxilContainer/DxcContainerBuilder.cpp b/lib/DxilContainer/DxcContainerBuilder.cpp index 770aa910a4..be182328dd 100644 --- a/lib/DxilContainer/DxcContainerBuilder.cpp +++ b/lib/DxilContainer/DxcContainerBuilder.cpp @@ -146,18 +146,14 @@ DxcContainerBuilder::SerializeContainer(IDxcOperationResult **ppResult) { // Combine existing warnings and errors from validation CComPtr pErrorBlob; CDxcMallocHeapPtr errorHeap(m_pMalloc); - SIZE_T warningLength = m_warning ? strlen(m_warning) : 0; - SIZE_T valErrorLength = + SIZE_T totalErrorLength = pValErrorUtf8 ? pValErrorUtf8->GetStringLength() : 0; - SIZE_T totalErrorLength = warningLength + valErrorLength; if (totalErrorLength) { SIZE_T errorSizeInBytes = totalErrorLength + 1; errorHeap.AllocateBytes(errorSizeInBytes); - if (warningLength) - memcpy(errorHeap.m_pData, m_warning, warningLength); - if (valErrorLength) - memcpy(errorHeap.m_pData + warningLength, - pValErrorUtf8->GetStringPointer(), valErrorLength); + + memcpy(errorHeap.m_pData, pValErrorUtf8->GetStringPointer(), + totalErrorLength); errorHeap.m_pData[totalErrorLength] = L'\0'; IFT(hlsl::DxcCreateBlobWithEncodingOnMalloc(errorHeap.m_pData, m_pMalloc, errorSizeInBytes, DXC_CP_UTF8, diff --git a/lib/DxilDia/DxcPixDxilStorage.cpp b/lib/DxilDia/DxcPixDxilStorage.cpp index 79d21303dc..4b06f472e8 100644 --- a/lib/DxilDia/DxcPixDxilStorage.cpp +++ b/lib/DxilDia/DxcPixDxilStorage.cpp @@ -185,7 +185,11 @@ dxil_debug_info::DxcPixDxilScalarStorage::Index(DWORD Index, STDMETHODIMP dxil_debug_info::DxcPixDxilScalarStorage::GetRegisterNumber( DWORD *pRegisterNumber) { const auto &ValueLocationMap = m_pVarInfo->m_ValueLocationMap; - auto RegIt = ValueLocationMap.find(m_OffsetFromStorageStartInBits); + // Bitfields will have been packed into their containing integer type: + DWORD size; + m_pOriginalType->GetSizeInBits(&size); + auto RegIt = + ValueLocationMap.find(m_OffsetFromStorageStartInBits & ~(size - 1)); if (RegIt == ValueLocationMap.end()) { return E_FAIL; diff --git a/lib/DxilPIXPasses/DxilAnnotateWithVirtualRegister.cpp b/lib/DxilPIXPasses/DxilAnnotateWithVirtualRegister.cpp index babf5b7953..88f696b7fa 100644 --- a/lib/DxilPIXPasses/DxilAnnotateWithVirtualRegister.cpp +++ b/lib/DxilPIXPasses/DxilAnnotateWithVirtualRegister.cpp @@ -76,19 +76,29 @@ class DxilAnnotateWithVirtualRegister : public llvm::ModulePass { private: void AnnotateValues(llvm::Instruction *pI); - void AnnotateStore(llvm::Instruction *pI); - void SplitVectorStores(hlsl::OP *HlslOP, llvm::Instruction *pI); + void AnnotateStore(hlsl::OP *HlslOP, llvm::Instruction *pI); + void SplitVectorStores(llvm::Instruction *pI); bool IsAllocaRegisterWrite(llvm::Value *V, llvm::AllocaInst **pAI, llvm::Value **pIdx); void AnnotateAlloca(llvm::AllocaInst *pAlloca); void AnnotateGeneric(llvm::Instruction *pI); void AssignNewDxilRegister(llvm::Instruction *pI); void AssignNewAllocaRegister(llvm::AllocaInst *pAlloca, std::uint32_t C); - + llvm::Value *AddConstIntValues(llvm::Value *l, llvm::Value *r); + llvm::Value *MultiplyConstIntValue(llvm::Value *l, uint32_t r); + llvm::Value *GetStructOffset(llvm::GetElementPtrInst *pGEP, + uint32_t &GEPOperandIndex, + llvm::Type *pElementType); hlsl::DxilModule *m_DM; std::uint32_t m_uVReg; std::unique_ptr m_MST; int m_StartInstruction = 0; + struct RememberedAllocaStores { + llvm::StoreInst *StoreInst; + llvm::Value *Index; + llvm::MDNode *AllocaReg; + }; + std::vector m_RememberedAllocaStores; void Init(llvm::Module &M) { m_DM = &M.GetOrCreateDxilModule(); @@ -129,8 +139,6 @@ bool DxilAnnotateWithVirtualRegister::runOnModule(llvm::Module &M) { m_DM->SetValidatorVersion(1, 4); } - std::uint32_t InstNum = m_StartInstruction; - auto instrumentableFunctions = PIXPassHelpers::GetAllInstrumentableFunctions(*m_DM); @@ -138,7 +146,7 @@ bool DxilAnnotateWithVirtualRegister::runOnModule(llvm::Module &M) { for (auto &block : F->getBasicBlockList()) { for (auto it = block.begin(); it != block.end();) { llvm::Instruction *I = &*(it++); - SplitVectorStores(m_DM->GetOP(), I); + SplitVectorStores(I); } } } @@ -151,17 +159,32 @@ bool DxilAnnotateWithVirtualRegister::runOnModule(llvm::Module &M) { } } + // Process all allocas referenced by dbg.declare intrinsics for (auto *F : instrumentableFunctions) { for (auto &block : F->getBasicBlockList()) { - for (llvm::Instruction &I : block.getInstList()) { - AnnotateStore(&I); + for (auto &I : block) { + if (auto *DbgDeclare = llvm::dyn_cast(&I)) { + // The first operand of DbgDeclare is the address (typically an + // AllocaInst) + if (auto *AddrVal = + llvm::dyn_cast(DbgDeclare->getAddress())) { + AnnotateValues(AddrVal); + } + } } } } + for (auto *F : instrumentableFunctions) + for (auto &block : F->getBasicBlockList()) { + for (llvm::Instruction &I : block.getInstList()) { + AnnotateStore(m_DM->GetOP(), &I); + } + } + for (auto *F : instrumentableFunctions) { - int InstructionRangeStart = InstNum; - int InstructionRangeEnd = InstNum; + int InstructionRangeStart = m_StartInstruction; + int InstructionRangeEnd = m_StartInstruction; for (auto &block : F->getBasicBlockList()) { for (llvm::Instruction &I : block.getInstList()) { // If the instruction is part of the debug value instrumentation added @@ -171,8 +194,9 @@ bool DxilAnnotateWithVirtualRegister::runOnModule(llvm::Module &M) { if (PixAllocaReg::FromInst(Alloca, &unused1, &unused2)) continue; if (!llvm::isa(&I)) { - pix_dxil::PixDxilInstNum::AddMD(M.getContext(), &I, InstNum++); - InstructionRangeEnd = InstNum; + pix_dxil::PixDxilInstNum::AddMD(M.getContext(), &I, + m_StartInstruction++); + InstructionRangeEnd = m_StartInstruction; } } } @@ -188,12 +212,17 @@ bool DxilAnnotateWithVirtualRegister::runOnModule(llvm::Module &M) { } } + for (auto const &as : m_RememberedAllocaStores) { + PixAllocaRegWrite::AddMD(m_DM->GetCtx(), as.StoreInst, as.AllocaReg, + as.Index); + } + if (OSOverride != nullptr) { // Print a set of strings of the exemplary form "InstructionCount: // " if (m_DM->GetShaderModel()->GetKind() == hlsl::ShaderModel::Kind::Library) *OSOverride << "\nIsLibrary\n"; - *OSOverride << "\nInstructionCount:" << InstNum << "\n"; + *OSOverride << "\nInstructionCount:" << m_StartInstruction << "\n"; } m_DM = nullptr; @@ -210,7 +239,8 @@ void DxilAnnotateWithVirtualRegister::AnnotateValues(llvm::Instruction *pI) { } } -void DxilAnnotateWithVirtualRegister::AnnotateStore(llvm::Instruction *pI) { +void DxilAnnotateWithVirtualRegister::AnnotateStore(hlsl::OP *HlslOP, + llvm::Instruction *pI) { auto *pSt = llvm::dyn_cast(pI); if (pSt == nullptr) { return; @@ -226,15 +256,47 @@ void DxilAnnotateWithVirtualRegister::AnnotateStore(llvm::Instruction *pI) { if (AllocaReg == nullptr) { return; } + m_RememberedAllocaStores.push_back({pSt, Index, AllocaReg}); +} + +llvm::Value * +DxilAnnotateWithVirtualRegister::MultiplyConstIntValue(llvm::Value *l, + uint32_t r) { + if (r == 1) + return l; + if (auto *lci = llvm::dyn_cast(l)) + return m_DM->GetOP()->GetU32Const(lci->getLimitedValue() * r); + // Should never get here, but if we do, return the left as a reasonable + // default: + return l; +} - PixAllocaRegWrite::AddMD(m_DM->GetCtx(), pSt, AllocaReg, Index); +llvm::Value * +DxilAnnotateWithVirtualRegister::AddConstIntValues(llvm::Value *l, + llvm::Value *r) { + auto *rci = llvm::dyn_cast(r); + if (rci && rci->getLimitedValue() == 0) + return l; + auto *lci = llvm::dyn_cast(l); + if (lci && lci->getLimitedValue() == 0) + return r; + // Both an assert and a check, in case of unexpected circumstances. + DXASSERT(lci != nullptr && rci != nullptr, + "Both sides of add should be constant ints"); + if (lci != nullptr && rci != nullptr) + return m_DM->GetOP()->GetU32Const(lci->getLimitedValue() + + rci->getLimitedValue()); + // In an emergency, return the left argument. It'll be closest to + // the desired value. + return l; } -static uint32_t GetStructOffset(llvm::GetElementPtrInst *pGEP, - uint32_t &GEPOperandIndex, - llvm::Type *pElementType) { +llvm::Value * +DxilAnnotateWithVirtualRegister::GetStructOffset(llvm::GetElementPtrInst *pGEP, + uint32_t &GEPOperandIndex, + llvm::Type *pElementType) { if (IsInstrumentableFundamentalType(pElementType)) { - return 0; + return m_DM->GetOP()->GetU32Const(0); } else if (auto *pArray = llvm::dyn_cast(pElementType)) { // 1D-array example: // @@ -248,18 +310,13 @@ static uint32_t GetStructOffset(llvm::GetElementPtrInst *pGEP, // -The zeroth element in the struct (which is the array) // -The zeroth element in that array - auto *pArrayIndex = - llvm::dyn_cast(pGEP->getOperand(GEPOperandIndex++)); - - if (pArrayIndex == nullptr) { - return 0; - } + auto *pArrayIndex = pGEP->getOperand(GEPOperandIndex++); - uint32_t ArrayIndex = pArrayIndex->getLimitedValue(); auto pArrayElementType = pArray->getArrayElementType(); - uint32_t MemberIndex = ArrayIndex * CountStructMembers(pArrayElementType); - return MemberIndex + - GetStructOffset(pGEP, GEPOperandIndex, pArrayElementType); + auto *MemberIndex = MultiplyConstIntValue( + pArrayIndex, CountStructMembers(pArrayElementType)); + return AddConstIntValues( + MemberIndex, GetStructOffset(pGEP, GEPOperandIndex, pArrayElementType)); } else if (auto *pStruct = llvm::dyn_cast(pElementType)) { DXASSERT(GEPOperandIndex < pGEP->getNumOperands(), "Unexpectedly read too many GetElementPtrInst operands"); @@ -268,7 +325,7 @@ static uint32_t GetStructOffset(llvm::GetElementPtrInst *pGEP, llvm::dyn_cast(pGEP->getOperand(GEPOperandIndex++)); if (pMemberIndex == nullptr) { - return 0; + return m_DM->GetOP()->GetU32Const(0); } uint32_t MemberIndex = pMemberIndex->getLimitedValue(); @@ -278,16 +335,17 @@ static uint32_t GetStructOffset(llvm::GetElementPtrInst *pGEP, MemberOffset += CountStructMembers(pStruct->getElementType(i)); } - return MemberOffset + GetStructOffset(pGEP, GEPOperandIndex, - pStruct->getElementType(MemberIndex)); + return AddConstIntValues( + m_DM->GetOP()->GetU32Const(MemberOffset), + GetStructOffset(pGEP, GEPOperandIndex, + pStruct->getElementType(MemberIndex))); } else { - return 0; + return m_DM->GetOP()->GetU32Const(0); } } bool DxilAnnotateWithVirtualRegister::IsAllocaRegisterWrite( llvm::Value *V, llvm::AllocaInst **pAI, llvm::Value **pIdx) { - llvm::IRBuilder<> B(m_DM->GetCtx()); *pAI = nullptr; *pIdx = nullptr; @@ -366,7 +424,8 @@ bool DxilAnnotateWithVirtualRegister::IsAllocaRegisterWrite( auto offset = GetStructOffset(pGEP, GEPOperandIndex, pStructType); - llvm::Value *IndexValue = B.getInt32(offset + precedingMemberCount); + llvm::Value *IndexValue = AddConstIntValues( + offset, m_DM->GetOP()->GetU32Const(precedingMemberCount)); if (IndexValue != nullptr) { *pAI = Alloca; @@ -383,7 +442,7 @@ bool DxilAnnotateWithVirtualRegister::IsAllocaRegisterWrite( } *pAI = pAlloca; - *pIdx = B.getInt32(0); + *pIdx = m_DM->GetOP()->GetU32Const(0); return true; } @@ -463,12 +522,13 @@ void DxilAnnotateWithVirtualRegister::AssignNewDxilRegister( void DxilAnnotateWithVirtualRegister::AssignNewAllocaRegister( llvm::AllocaInst *pAlloca, std::uint32_t C) { - PixAllocaReg::AddMD(m_DM->GetCtx(), pAlloca, m_uVReg, C); - m_uVReg += C; + if (!PixAllocaReg::FromInst(pAlloca, nullptr, nullptr)) { + PixAllocaReg::AddMD(m_DM->GetCtx(), pAlloca, m_uVReg, C); + m_uVReg += C; + } } -void DxilAnnotateWithVirtualRegister::SplitVectorStores(hlsl::OP *HlslOP, - llvm::Instruction *pI) { +void DxilAnnotateWithVirtualRegister::SplitVectorStores(llvm::Instruction *pI) { auto *pSt = llvm::dyn_cast(pI); if (pSt == nullptr) { return; diff --git a/lib/DxilPIXPasses/DxilDbgValueToDbgDeclare.cpp b/lib/DxilPIXPasses/DxilDbgValueToDbgDeclare.cpp index bf25d9f85f..9ddbe876b5 100644 --- a/lib/DxilPIXPasses/DxilDbgValueToDbgDeclare.cpp +++ b/lib/DxilPIXPasses/DxilDbgValueToDbgDeclare.cpp @@ -36,7 +36,7 @@ using namespace PIXPassHelpers; using namespace llvm; -//#define VALUE_TO_DECLARE_LOGGING +// #define VALUE_TO_DECLARE_LOGGING #ifdef VALUE_TO_DECLARE_LOGGING #ifndef PIX_DEBUG_DUMP_HELPER @@ -859,8 +859,8 @@ void DxilDbgValueToDbgDeclare::handleDbgValue(llvm::Module &M, VALUE_TO_DECLARE_LOG("... variable was null too"); } - llvm::Value *V = DbgValue->getValue(); - if (V == nullptr) { + llvm::Value *ValueFromDbgInst = DbgValue->getValue(); + if (ValueFromDbgInst == nullptr) { // The metadata contained a null Value, so we ignore it. This // seems to be a dxcompiler bug. VALUE_TO_DECLARE_LOG("...Null value!"); @@ -873,20 +873,20 @@ void DxilDbgValueToDbgDeclare::handleDbgValue(llvm::Module &M, return; } - if (llvm::isa(V->getType())) { + if (llvm::isa(ValueFromDbgInst->getType())) { // Safeguard: If the type is not a pointer type, then this is // dbg.value directly pointing to a memory location instead of // a value. if (!IsDITypePointer(Ty, EmptyMap)) { // We only know how to handle AllocaInsts for now - if (!isa(V)) { + if (!isa(ValueFromDbgInst)) { VALUE_TO_DECLARE_LOG( "... variable had pointer type, but is not an alloca."); return; } IRBuilder<> B(DbgValue->getNextNode()); - V = B.CreateLoad(V); + ValueFromDbgInst = B.CreateLoad(ValueFromDbgInst); } } @@ -931,7 +931,7 @@ void DxilDbgValueToDbgDeclare::handleDbgValue(llvm::Module &M, } const OffsetInBits InitialOffset = PackedOffsetFromVar; - auto *insertPt = llvm::dyn_cast(V); + auto *insertPt = llvm::dyn_cast(ValueFromDbgInst); if (insertPt != nullptr && !llvm::isa(insertPt)) { insertPt = insertPt->getNextNode(); // Drivers may crash if phi nodes aren't always at the top of a block, @@ -950,7 +950,8 @@ void DxilDbgValueToDbgDeclare::handleDbgValue(llvm::Module &M, // Offset}. InitialOffset is the offset from DbgValue's expression // (i.e., the offset from the Variable's start), and Offset is the // Scalar Value's packed offset from DbgValue's value. - for (const ValueAndOffset &VO : SplitValue(V, InitialOffset, B)) { + for (const ValueAndOffset &VO : + SplitValue(ValueFromDbgInst, InitialOffset, B)) { OffsetInBits AlignedOffset; if (!Offsets.GetAlignedOffsetFromPackedOffset(VO.m_PackedOffset, diff --git a/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp b/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp index a7d7e72cb4..4dd43b07cc 100644 --- a/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp +++ b/lib/DxilPIXPasses/DxilDebugInstrumentation.cpp @@ -1356,7 +1356,19 @@ DxilDebugInstrumentation::FindInstrumentableInstructionsInBlock( IndexingToken = "s"; // static indexing, no debug output required } else { IndexingToken = "d"; // dynamic indexing - RegisterOrStaticIndex = std::to_string(IandT->AllocaBase); + int MaxArraySize = 1; + if (auto *Store = dyn_cast(&Inst)) { + if (auto *GEP = + dyn_cast(Store->getPointerOperand())) { + if (auto *Alloca = + dyn_cast(GEP->getPointerOperand())) { + MaxArraySize = + Alloca->getAllocatedType()->getArrayNumElements(); + } + } + } + RegisterOrStaticIndex = std::to_string(IandT->AllocaBase) + "-" + + std::to_string(MaxArraySize); DebugOutputForThisInstruction.ValueToWriteToDebugMemory = IandT->AllocaWriteIndex; } @@ -1374,7 +1386,8 @@ DxilDebugInstrumentation::FindInstrumentableInstructionsInBlock( *OSOverride << "," << *RegisterOrStaticIndex; } if (IandT->ConstantAllocaStoreValue) { - *OSOverride << "," << std::to_string(*IandT->ConstantAllocaStoreValue); + uint64_t value = IandT->ConstantAllocaStoreValue.value(); + *OSOverride << "," << std::to_string(value); } *OSOverride << ";"; if (DebugOutputForThisInstruction.ValueToWriteToDebugMemory) diff --git a/lib/DxilPIXPasses/DxilPIXVirtualRegisters.cpp b/lib/DxilPIXPasses/DxilPIXVirtualRegisters.cpp index f68e2082bc..a60f6a77a7 100644 --- a/lib/DxilPIXPasses/DxilPIXVirtualRegisters.cpp +++ b/lib/DxilPIXPasses/DxilPIXVirtualRegisters.cpp @@ -124,8 +124,10 @@ static bool ParsePixAllocaReg(llvm::MDNode *MD, std::uint32_t *RegNum, return false; } - *RegNum = mdRegNum->getLimitedValue(); - *Count = mdCount->getLimitedValue(); + if (RegNum != nullptr) + *RegNum = mdRegNum->getLimitedValue(); + if (Count != nullptr) + *Count = mdCount->getLimitedValue(); return true; } @@ -144,8 +146,10 @@ void pix_dxil::PixAllocaReg::AddMD(llvm::LLVMContext &Ctx, bool pix_dxil::PixAllocaReg::FromInst(llvm::AllocaInst const *pAlloca, std::uint32_t *pRegBase, std::uint32_t *pRegSize) { - *pRegBase = 0; - *pRegSize = 0; + if (pRegBase != nullptr) + *pRegBase = 0; + if (pRegSize != nullptr) + *pRegSize = 0; auto *mdNodes = pAlloca->getMetadata(MDName); if (mdNodes == nullptr) { diff --git a/lib/DxilValidation/DxilValidation.cpp b/lib/DxilValidation/DxilValidation.cpp index 00a6b9ae14..9587897e22 100644 --- a/lib/DxilValidation/DxilValidation.cpp +++ b/lib/DxilValidation/DxilValidation.cpp @@ -165,7 +165,8 @@ ValidateSignatureAccess(Instruction *I, DxilSignature &Sig, Value *SigId, static DxilResourceProperties GetResourceFromHandle(Value *Handle, ValidationContext &ValCtx) { - if (!isa(Handle)) { + CallInst *HandleCall = dyn_cast(Handle); + if (!HandleCall) { if (Instruction *I = dyn_cast(Handle)) ValCtx.EmitInstrError(I, ValidationRule::InstrHandleNotFromCreateHandle); else @@ -175,10 +176,13 @@ static DxilResourceProperties GetResourceFromHandle(Value *Handle, } DxilResourceProperties RP = ValCtx.GetResourceFromVal(Handle); - if (RP.getResourceClass() == DXIL::ResourceClass::Invalid) { + if (RP.getResourceClass() == DXIL::ResourceClass::Invalid) ValCtx.EmitInstrError(cast(Handle), ValidationRule::InstrHandleNotFromCreateHandle); - } + if (RP.Basic.IsReorderCoherent && + !ValCtx.DxilMod.GetShaderModel()->IsSM69Plus()) + ValCtx.EmitInstrError(HandleCall, + ValidationRule::InstrReorderCoherentRequiresSM69); return RP; } @@ -970,6 +974,293 @@ static void ValidateImmOperandForMathDxilOp(CallInst *CI, DXIL::OpCode Opcode, } } +static bool CheckLinalgInterpretation(uint32_t Input, bool InRegister) { + using CT = DXIL::ComponentType; + switch (static_cast(Input)) { + case CT::I16: + case CT::U16: + case CT::I32: + case CT::U32: + case CT::F16: + case CT::F32: + case CT::U8: + case CT::I8: + case CT::F8_E4M3: + case CT::F8_E5M2: + return true; + case CT::PackedS8x32: + case CT::PackedU8x32: + return InRegister; + default: + return false; + } +} + +static bool CheckMatrixLayoutForMatVecMulOps(unsigned Layout) { + return Layout <= + static_cast(DXIL::LinalgMatrixLayout::OuterProductOptimal); +} + +std::string GetMatrixLayoutStr(unsigned Layout) { + switch (static_cast(Layout)) { + case DXIL::LinalgMatrixLayout::RowMajor: + return "RowMajor"; + case DXIL::LinalgMatrixLayout::ColumnMajor: + return "ColumnMajor"; + case DXIL::LinalgMatrixLayout::MulOptimal: + return "MulOptimal"; + case DXIL::LinalgMatrixLayout::OuterProductOptimal: + return "OuterProductOptimal"; + default: + DXASSERT_NOMSG(false); + return "Invalid"; + } +} + +static bool CheckTransposeForMatrixLayout(unsigned Layout, bool Transposed) { + switch (static_cast(Layout)) { + case DXIL::LinalgMatrixLayout::RowMajor: + case DXIL::LinalgMatrixLayout::ColumnMajor: + return !Transposed; + + default: + return true; + } +} + +static bool CheckUnsignedFlag(Type *VecTy, bool IsUnsigned) { + Type *ElemTy = VecTy->getScalarType(); + if (ElemTy->isFloatingPointTy()) + return !IsUnsigned; + + return true; +} + +static Value *GetMatVecOpIsOutputUnsigned(CallInst *CI, DXIL::OpCode OpCode) { + switch (OpCode) { + case DXIL::OpCode::MatVecMul: + return CI->getOperand(DXIL::OperandIndex::kMatVecMulIsOutputUnsignedIdx); + case DXIL::OpCode::MatVecMulAdd: + return CI->getOperand(DXIL::OperandIndex::kMatVecMulAddIsOutputUnsignedIdx); + + default: + DXASSERT_NOMSG(false); + return nullptr; + } +} + +static void ValidateImmOperandsForMatVecOps(CallInst *CI, DXIL::OpCode OpCode, + ValidationContext &ValCtx) { + + llvm::Value *IsInputUnsigned = + CI->getOperand(DXIL::OperandIndex::kMatVecMulIsInputUnsignedIdx); + ConstantInt *IsInputUnsignedConst = + dyn_cast(IsInputUnsigned); + if (!IsInputUnsignedConst) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrMatVecOpIsUnsignedFlagsAreConst, + {"IsInputUnsigned"}); + return; + } + + llvm::Value *IsOutputUnsigned = GetMatVecOpIsOutputUnsigned(CI, OpCode); + ConstantInt *IsOutputUnsignedConst = + dyn_cast(IsOutputUnsigned); + if (!IsOutputUnsignedConst) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrMatVecOpIsUnsignedFlagsAreConst, + {"IsOutputUnsigned"}); + return; + } + + llvm::Value *InputInterpretation = + CI->getOperand(DXIL::OperandIndex::kMatVecMulInputInterpretationIdx); + ConstantInt *II = dyn_cast(InputInterpretation); + if (!II) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgInterpretationParamAreConst, + {"InputInterpretation"}); + return; + } + uint64_t IIValue = II->getLimitedValue(); + if (!CheckLinalgInterpretation(IIValue, true)) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgInvalidRegisterInterpValue, + {std::to_string(IIValue), "Input"}); + return; + } + + llvm::Value *MatrixInterpretation = + CI->getOperand(DXIL::OperandIndex::kMatVecMulMatrixInterpretationIdx); + ConstantInt *MI = dyn_cast(MatrixInterpretation); + if (!MI) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgInterpretationParamAreConst, + {"MatrixInterpretation"}); + return; + } + uint64_t MIValue = MI->getLimitedValue(); + if (!CheckLinalgInterpretation(MIValue, false)) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgInvalidMemoryInterpValue, + {std::to_string(MIValue), "Matrix"}); + return; + } + + llvm::Value *MatrixM = + CI->getOperand(DXIL::OperandIndex::kMatVecMulMatrixMIdx); + if (!llvm::isa(MatrixM)) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgMatrixShapeParamsAreConst, + {"Matrix M dimension"}); + return; + } + + llvm::Value *MatrixK = + CI->getOperand(DXIL::OperandIndex::kMatVecMulMatrixKIdx); + if (!llvm::isa(MatrixK)) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgMatrixShapeParamsAreConst, + {"Matrix K dimension"}); + return; + } + + llvm::Value *MatrixLayout = + CI->getOperand(DXIL::OperandIndex::kMatVecMulMatrixLayoutIdx); + + ConstantInt *MatrixLayoutConst = dyn_cast(MatrixLayout); + if (!MatrixLayoutConst) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgMatrixShapeParamsAreConst, + {"Matrix Layout"}); + return; + } + uint64_t MLValue = MatrixLayoutConst->getLimitedValue(); + if (!CheckMatrixLayoutForMatVecMulOps(MLValue)) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgInvalidMatrixLayoutValueForMatVecOps, + {std::to_string(MLValue), + std::to_string( + static_cast(DXIL::LinalgMatrixLayout::RowMajor)), + std::to_string(static_cast( + DXIL::LinalgMatrixLayout::OuterProductOptimal))}); + return; + } + + llvm::Value *MatrixTranspose = + CI->getOperand(DXIL::OperandIndex::kMatVecMulMatrixTransposeIdx); + ConstantInt *MatrixTransposeConst = dyn_cast(MatrixTranspose); + if (!MatrixTransposeConst) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgMatrixShapeParamsAreConst, + {"MatrixTranspose"}); + return; + } + + if (!CheckTransposeForMatrixLayout(MLValue, + MatrixTransposeConst->getLimitedValue())) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgMatrixLayoutNotTransposable, + {GetMatrixLayoutStr(MLValue)}); + return; + } + + llvm::Value *InputVector = + CI->getOperand(DXIL::OperandIndex::kMatVecMulInputVectorIdx); + if (!CheckUnsignedFlag(InputVector->getType(), + IsInputUnsignedConst->getLimitedValue())) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgNotAnUnsignedType, {"Input"}); + return; + } + + if (!CheckUnsignedFlag(CI->getType(), + IsOutputUnsignedConst->getLimitedValue())) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgNotAnUnsignedType, {"Output"}); + return; + } + + switch (OpCode) { + case DXIL::OpCode::MatVecMulAdd: { + llvm::Value *BiasInterpretation = + CI->getOperand(DXIL::OperandIndex::kMatVecMulAddBiasInterpretation); + ConstantInt *BI = cast(BiasInterpretation); + if (!BI) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgInterpretationParamAreConst, + {"BiasInterpretation"}); + return; + } + uint64_t BIValue = BI->getLimitedValue(); + if (!CheckLinalgInterpretation(BIValue, false)) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgInvalidMemoryInterpValue, + {std::to_string(BIValue), "Bias vector"}); + return; + } + } break; + default: + break; + } +} + +static void ValidateImmOperandsForOuterProdAcc(CallInst *CI, + ValidationContext &ValCtx) { + + llvm::Value *MatrixInterpretation = + CI->getOperand(DXIL::OperandIndex::kOuterProdAccMatrixInterpretation); + ConstantInt *MI = cast(MatrixInterpretation); + if (!MI) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgInterpretationParamAreConst, + {"MatrixInterpretation"}); + return; + } + uint64_t MIValue = MI->getLimitedValue(); + if (!CheckLinalgInterpretation(MIValue, false)) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgInvalidMemoryInterpValue, + {std::to_string(MIValue), "Matrix"}); + return; + } + + llvm::Value *MatrixLayout = + CI->getOperand(DXIL::OperandIndex::kOuterProdAccMatrixLayout); + if (!llvm::isa(MatrixLayout)) { + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrLinalgMatrixShapeParamsAreConst, + {"MatrixLayout"}); + return; + } + ConstantInt *ML = cast(MatrixLayout); + uint64_t MLValue = ML->getLimitedValue(); + if (MLValue != + static_cast(DXIL::LinalgMatrixLayout::OuterProductOptimal)) + ValCtx.EmitInstrFormatError( + CI, + ValidationRule:: + InstrLinalgInvalidMatrixLayoutValueForOuterProductAccumulate, + {GetMatrixLayoutStr(MLValue), + GetMatrixLayoutStr(static_cast( + DXIL::LinalgMatrixLayout::OuterProductOptimal))}); + + llvm::Value *MatrixStride = + CI->getOperand(DXIL::OperandIndex::kOuterProdAccMatrixStride); + if (!llvm::isa(MatrixStride)) { + ValCtx.EmitInstrError( + CI, ValidationRule::InstrLinalgMatrixStrideZeroForOptimalLayouts); + return; + } + ConstantInt *MS = cast(MatrixStride); + uint64_t MSValue = MS->getLimitedValue(); + if (MSValue != 0) { + ValCtx.EmitInstrError( + CI, ValidationRule::InstrLinalgMatrixStrideZeroForOptimalLayouts); + return; + } +} + // Validate the type-defined mask compared to the store value mask which // indicates which parts were defined returns true if caller should continue // validation @@ -1282,9 +1573,15 @@ static void ValidateResourceDxilOp(CallInst *CI, DXIL::OpCode Opcode, ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped); } else { Value *V = EVI->getOperand(0); + StructType *StrTy = dyn_cast(V->getType()); + unsigned ExtractIndex = EVI->getIndices()[0]; + // Ensure parameter is a single value that is extracted from the correct + // ResRet struct location. bool IsLegal = EVI->getNumIndices() == 1 && - EVI->getIndices()[0] == DXIL::kResRetStatusIndex && - ValCtx.DxilMod.GetOP()->IsResRetType(V->getType()); + (ExtractIndex == DXIL::kResRetStatusIndex || + ExtractIndex == DXIL::kVecResRetStatusIndex) && + ValCtx.DxilMod.GetOP()->IsResRetType(StrTy) && + ExtractIndex == StrTy->getNumElements() - 1; if (!IsLegal) { ValCtx.EmitInstrError(CI, ValidationRule::InstrCheckAccessFullyMapped); } @@ -1644,6 +1941,46 @@ static unsigned getSemanticFlagValidMask(const ShaderModel *pSM) { return static_cast(hlsl::DXIL::BarrierSemanticFlag::ValidMask); } +StringRef GetOpCodeName(DXIL::OpCode OpCode) { + switch (OpCode) { + default: + DXASSERT(false, "Unexpected op code"); + return ""; + case DXIL::OpCode::HitObject_ObjectRayOrigin: + return "HitObject_ObjectRayOrigin"; + case DXIL::OpCode::HitObject_WorldRayDirection: + return "HitObject_WorldRayDirection"; + case DXIL::OpCode::HitObject_WorldRayOrigin: + return "HitObject_WorldRayOrigin"; + case DXIL::OpCode::HitObject_ObjectRayDirection: + return "HitObject_ObjectRayDirection"; + case DXIL::OpCode::HitObject_WorldToObject3x4: + return "HitObject_WorldToObject3x4"; + case DXIL::OpCode::HitObject_ObjectToWorld3x4: + return "HitObject_ObjectToWorld3x4"; + } +} + +static void ValidateConstantRangeUnsigned(Value *Val, StringRef Name, + uint64_t LowerBound, + uint64_t UpperBound, CallInst *CI, + DXIL::OpCode OpCode, + ValidationContext &ValCtx) { + ConstantInt *C = dyn_cast(Val); + if (!C) { + ValCtx.EmitInstrFormatError(CI, ValidationRule::InstrOpConst, + {Name, GetOpCodeName(OpCode)}); + return; + } + if (C->uge(UpperBound + 1U) || !C->uge(LowerBound)) { + std::string Range = + std::to_string(LowerBound) + "~" + std::to_string(UpperBound); + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrOperandRange, + {Name, Range, C->getValue().toString(10, false)}); + } +} + static void ValidateDxilOperationCallInProfile(CallInst *CI, DXIL::OpCode Opcode, const ShaderModel *pSM, @@ -1909,7 +2246,109 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, ValCtx.EmitInstrError( CI, ValidationRule::InstrMayReorderThreadUndefCoherenceHintParam); } break; + case DXIL::OpCode::HitObject_MakeMiss: { + DxilInst_HitObject_MakeMiss MakeMiss(CI); + if (isa(MakeMiss.get_RayFlags()) || + isa(MakeMiss.get_MissShaderIndex())) + ValCtx.EmitInstrError(CI, ValidationRule::InstrNoReadingUninitialized); + } break; + case DXIL::OpCode::HitObject_LoadLocalRootTableConstant: { + Value *HitObject = CI->getArgOperand(1); + if (isa(HitObject)) + ValCtx.EmitInstrError(CI, ValidationRule::InstrUndefHitObject); + Value *Offset = CI->getArgOperand(2); + if (isa(Offset)) + ValCtx.EmitInstrError(CI, ValidationRule::InstrNoReadingUninitialized); + if (ConstantInt *COffset = dyn_cast(Offset)) { + if (COffset->getLimitedValue() % 4 != 0) + ValCtx.EmitInstrFormatError( + CI, ValidationRule::InstrParamMultiple, + {"offset", "4", COffset->getValue().toString(10, false)}); + } + break; + } + case DXIL::OpCode::HitObject_SetShaderTableIndex: { + Value *HitObject = CI->getArgOperand(1); + if (isa(HitObject)) + ValCtx.EmitInstrError(CI, ValidationRule::InstrUndefHitObject); + Value *RecordIndex = CI->getArgOperand(2); + if (isa(RecordIndex)) + ValCtx.EmitInstrError(CI, ValidationRule::InstrNoReadingUninitialized); + break; + } + + // Shader Execution Reordering - scalar getters + case DXIL::OpCode::HitObject_GeometryIndex: + case DXIL::OpCode::HitObject_HitKind: + case DXIL::OpCode::HitObject_InstanceID: + case DXIL::OpCode::HitObject_InstanceIndex: + case DXIL::OpCode::HitObject_IsHit: + case DXIL::OpCode::HitObject_IsMiss: + case DXIL::OpCode::HitObject_IsNop: + case DXIL::OpCode::HitObject_PrimitiveIndex: + case DXIL::OpCode::HitObject_RayFlags: + case DXIL::OpCode::HitObject_RayTCurrent: + case DXIL::OpCode::HitObject_RayTMin: + case DXIL::OpCode::HitObject_ShaderTableIndex: { + Value *HitObject = CI->getArgOperand(1); + if (isa(HitObject)) + ValCtx.EmitInstrError(CI, ValidationRule::InstrUndefHitObject); + break; + } + + // Shader Execution Reordering - vector getters + case DXIL::OpCode::HitObject_ObjectRayDirection: + case DXIL::OpCode::HitObject_ObjectRayOrigin: + case DXIL::OpCode::HitObject_WorldRayDirection: + case DXIL::OpCode::HitObject_WorldRayOrigin: { + Value *HitObject = CI->getArgOperand(1); + if (isa(HitObject)) + ValCtx.EmitInstrError(CI, ValidationRule::InstrUndefHitObject); + Value *Col = CI->getArgOperand(2); + ValidateConstantRangeUnsigned(Col, "component", 0, 2, CI, Opcode, ValCtx); + break; + } + + // Shader Execution Reordering - matrix getters + case DXIL::OpCode::HitObject_WorldToObject3x4: + case DXIL::OpCode::HitObject_ObjectToWorld3x4: { + Value *HitObject = CI->getArgOperand(1); + if (isa(HitObject)) + ValCtx.EmitInstrError(CI, ValidationRule::InstrUndefHitObject); + Value *Row = CI->getArgOperand(2); + ValidateConstantRangeUnsigned(Row, "row", 0, 2, CI, Opcode, ValCtx); + Value *Col = CI->getArgOperand(3); + ValidateConstantRangeUnsigned(Col, "column", 0, 3, CI, Opcode, ValCtx); + break; + } + + // Shader Execution Reordering - from ray query + case DXIL::OpCode::HitObject_FromRayQuery: + case DXIL::OpCode::HitObject_FromRayQueryWithAttrs: { + for (unsigned i = 1; i < CI->getNumOperands(); ++i) { + Value *Arg = CI->getArgOperand(i); + if (isa(Arg)) + ValCtx.EmitInstrError(CI, ValidationRule::InstrNoReadingUninitialized); + } + break; + } + + case DXIL::OpCode::HitObject_Invoke: { + if (isa(CI->getArgOperand(1))) + ValCtx.EmitInstrError(CI, ValidationRule::InstrUndefHitObject); + if (isa(CI->getArgOperand(2))) + ValCtx.EmitInstrError(CI, ValidationRule::InstrNoReadingUninitialized); + } break; + case DXIL::OpCode::HitObject_TraceRay: { + Value *Hdl = CI->getArgOperand( + DxilInst_HitObject_TraceRay::arg_accelerationStructure); + ValidateASHandle(CI, Hdl, ValCtx); + for (unsigned ArgIdx = 2; ArgIdx < CI->getNumArgOperands(); ++ArgIdx) + if (isa(CI->getArgOperand(ArgIdx))) + ValCtx.EmitInstrError(CI, ValidationRule::InstrNoReadingUninitialized); + DxilInst_HitObject_TraceRay HOTraceRay(CI); + } break; case DXIL::OpCode::AtomicBinOp: case DXIL::OpCode::AtomicCompareExchange: { Type *pOverloadType = OP::GetOverloadType(Opcode, CI->getCalledFunction()); @@ -1994,6 +2433,16 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI, GetLaunchTypeStr(NodeLaunchType)}); break; + case DXIL::OpCode::MatVecMul: + case DXIL::OpCode::MatVecMulAdd: + ValidateImmOperandsForMatVecOps(CI, Opcode, ValCtx); + break; + case DXIL::OpCode::OuterProductAccumulate: + ValidateImmOperandsForOuterProdAcc(CI, ValCtx); + break; + case DXIL::OpCode::VectorAccumulate: + + break; default: // TODO: make sure every Opcode is checked. @@ -2212,6 +2661,9 @@ static bool ValidateType(Type *Ty, ValidationContext &ValCtx, if (ValCtx.HandleTy == Ty) return true; hlsl::OP *HlslOP = ValCtx.DxilMod.GetOP(); + // Allow HitObject type. + if (ST == HlslOP->GetHitObjectType()) + return true; if (IsDxilBuiltinStructType(ST, HlslOP)) { ValCtx.EmitTypeError(Ty, ValidationRule::InstrDxilStructUser); Result = false; @@ -3766,6 +4218,9 @@ static void ValidateResourceOverlap( static void ValidateResource(hlsl::DxilResource &Res, ValidationContext &ValCtx) { + if (Res.IsReorderCoherent() && !ValCtx.DxilMod.GetShaderModel()->IsSM69Plus()) + ValCtx.EmitResourceError(&Res, + ValidationRule::InstrReorderCoherentRequiresSM69); switch (Res.GetKind()) { case DXIL::ResourceKind::RawBuffer: case DXIL::ResourceKind::TypedBuffer: @@ -3997,10 +4452,13 @@ static void ValidateResources(ValidationContext &ValCtx) { ValCtx.EmitResourceError(Uav.get(), ValidationRule::SmCounterOnlyOnStructBuf); } - if (Uav->HasCounter() && Uav->IsGloballyCoherent()) - ValCtx.EmitResourceFormatError(Uav.get(), - ValidationRule::MetaGlcNotOnAppendConsume, - {ValCtx.GetResourceName(Uav.get())}); + const bool UavIsCoherent = + Uav->IsGloballyCoherent() || Uav->IsReorderCoherent(); + if (Uav->HasCounter() && UavIsCoherent) { + StringRef Prefix = Uav->IsGloballyCoherent() ? "globally" : "reorder"; + ValCtx.EmitResourceFormatError( + Uav.get(), ValidationRule::MetaCoherenceNotOnAppendConsume, {Prefix}); + } ValidateResource(*Uav, ValCtx); ValidateResourceOverlap(*Uav, UavAllocator, ValCtx); diff --git a/lib/HLSL/DxilCondenseResources.cpp b/lib/HLSL/DxilCondenseResources.cpp index 529c203bdc..09dd9cea64 100644 --- a/lib/HLSL/DxilCondenseResources.cpp +++ b/lib/HLSL/DxilCondenseResources.cpp @@ -655,7 +655,7 @@ class ResourceUseErrors { public: ResourceUseErrors() : m_bErrorsReported(false) {} - enum ErrorCode { + enum ErrorCode : unsigned int { // Collision between use of one resource GV and another. // All uses must be guaranteed to resolve to only one GV. // Additionally, when writing resource to alloca, all uses diff --git a/lib/HLSL/HLOperationLower.cpp b/lib/HLSL/HLOperationLower.cpp index be45021e41..3c062475af 100644 --- a/lib/HLSL/HLOperationLower.cpp +++ b/lib/HLSL/HLOperationLower.cpp @@ -7,9 +7,6 @@ // // // Lower functions to lower HL operations to DXIL operations. // // // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // -// All rights reserved. // -// // /////////////////////////////////////////////////////////////////////////////// #include "dxc/DXIL/DxilConstants.h" @@ -19,6 +16,8 @@ #include #include +#include "dxc/DXIL/DxilConstants.h" +#include "dxc/DXIL/DxilInstructions.h" #include "dxc/DXIL/DxilModule.h" #include "dxc/DXIL/DxilOperations.h" #include "dxc/DXIL/DxilResourceProperties.h" @@ -3064,10 +3063,10 @@ static Value *ScalarizeResRet(Type *RetTy, Value *ResRet, } void UpdateStatus(Value *ResRet, Value *status, IRBuilder<> &Builder, - hlsl::OP *hlslOp) { + hlsl::OP *hlslOp, + unsigned StatusIndex = DXIL::kResRetStatusIndex) { if (status && !isa(status)) { - Value *statusVal = - Builder.CreateExtractValue(ResRet, DXIL::kResRetStatusIndex); + Value *statusVal = Builder.CreateExtractValue(ResRet, StatusIndex); Value *checkAccessOp = hlslOp->GetI32Const( static_cast(DXIL::OpCode::CheckAccessFullyMapped)); Function *checkAccessFn = hlslOp->GetOpFunc( @@ -4029,9 +4028,9 @@ struct ResLoadHelper { // Used for some subscript operators that feed the generic HL call inst // into a load op and by the matrixload call instruction. ResLoadHelper(Instruction *Inst, DxilResource::Kind RK, Value *h, Value *idx, - Value *Offset, Value *mip = nullptr) + Value *Offset, Value *status = nullptr, Value *mip = nullptr) : intrinsicOpCode(IntrinsicOp::Num_Intrinsics), handle(h), retVal(Inst), - addr(idx), offset(Offset), status(nullptr), mipLevel(mip) { + addr(idx), offset(Offset), status(status), mipLevel(mip) { opcode = LoadOpFromResKind(RK); Type *Ty = Inst->getType(); if (opcode == OP::OpCode::RawBufferLoad && Ty->isVectorTy() && @@ -4305,18 +4304,22 @@ Value *TranslateBufLoad(ResLoadHelper &helper, HLResource::Kind RK, Function *F = OP->GetOpFunc(opcode, EltTy); Value *Ld = Builder.CreateCall(F, Args, OP::GetOpCodeName(opcode)); + unsigned StatusIndex; // Extract elements from returned ResRet. // Native vector loads just have one vector element in the ResRet. // Others have up to four scalars that need to be individually extracted. - if (opcode == OP::OpCode::RawBufferVectorLoad) + if (opcode == OP::OpCode::RawBufferVectorLoad) { Elts[i++] = Builder.CreateExtractValue(Ld, 0); - else + StatusIndex = DXIL::kVecResRetStatusIndex; + } else { for (unsigned j = 0; j < chunkSize; j++, i++) Elts[i] = Builder.CreateExtractValue(Ld, j); + StatusIndex = DXIL::kResRetStatusIndex; + } // Update status. - UpdateStatus(Ld, helper.status, Builder, OP); + UpdateStatus(Ld, helper.status, Builder, OP, StatusIndex); if (!FirstLd) FirstLd = Ld; @@ -5718,58 +5721,51 @@ Value *TranslateCallShader(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return Builder.CreateCall(F, {opArg, ShaderIndex, Parameter}); } -Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, - HLOperationLowerHelper &helper, +static void TransferRayDescArgs(Value **Args, hlsl::OP *OP, + IRBuilder<> &Builder, CallInst *CI, + unsigned &Index, unsigned &HLIndex) { + // Extract elements from flattened ray desc arguments in HL op. + // float3 Origin; + Value *origin = CI->getArgOperand(HLIndex++); + Args[Index++] = Builder.CreateExtractElement(origin, (uint64_t)0); + Args[Index++] = Builder.CreateExtractElement(origin, 1); + Args[Index++] = Builder.CreateExtractElement(origin, 2); + // float TMin; + Args[Index++] = CI->getArgOperand(HLIndex++); + // float3 Direction; + Value *direction = CI->getArgOperand(HLIndex++); + Args[Index++] = Builder.CreateExtractElement(direction, (uint64_t)0); + Args[Index++] = Builder.CreateExtractElement(direction, 1); + Args[Index++] = Builder.CreateExtractElement(direction, 2); + // float TMax; + Args[Index++] = CI->getArgOperand(HLIndex++); +} + +Value *TranslateTraceRay(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, + HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - hlsl::OP *hlslOP = &helper.hlslOP; - - Value *rayDesc = CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx); - Value *payLoad = CI->getArgOperand(HLOperandIndex::kTraceRayPayLoadOpIdx); - - Value *opArg = hlslOP->GetU32Const(static_cast(opcode)); + hlsl::OP *OP = &Helper.hlslOP; Value *Args[DXIL::OperandIndex::kTraceRayNumOp]; - Args[0] = opArg; - for (unsigned i = 1; i < HLOperandIndex::kTraceRayRayDescOpIdx; i++) { - Args[i] = CI->getArgOperand(i); - } - IRBuilder<> Builder(CI); - // struct RayDesc - //{ - // float3 Origin; - // float TMin; - // float3 Direction; - // float TMax; - //}; - Value *zeroIdx = hlslOP->GetU32Const(0); - Value *origin = Builder.CreateGEP(rayDesc, {zeroIdx, zeroIdx}); - origin = Builder.CreateLoad(origin); - unsigned index = DXIL::OperandIndex::kTraceRayRayDescOpIdx; - Args[index++] = Builder.CreateExtractElement(origin, (uint64_t)0); - Args[index++] = Builder.CreateExtractElement(origin, 1); - Args[index++] = Builder.CreateExtractElement(origin, 2); + Args[0] = OP->GetU32Const(static_cast(OpCode)); + unsigned Index = 1, HLIndex = 1; + while (HLIndex < HLOperandIndex::kTraceRayRayDescOpIdx) + Args[Index++] = CI->getArgOperand(HLIndex++); - Value *tmin = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(1)}); - tmin = Builder.CreateLoad(tmin); - Args[index++] = tmin; - - Value *direction = - Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(2)}); - direction = Builder.CreateLoad(direction); + IRBuilder<> Builder(CI); + TransferRayDescArgs(Args, OP, Builder, CI, Index, HLIndex); + DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands() - 1); + DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayPayloadOpIdx); - Args[index++] = Builder.CreateExtractElement(direction, (uint64_t)0); - Args[index++] = Builder.CreateExtractElement(direction, 1); - Args[index++] = Builder.CreateExtractElement(direction, 2); + Value *Payload = CI->getArgOperand(HLIndex++); + Args[Index++] = Payload; - Value *tmax = Builder.CreateGEP(rayDesc, {zeroIdx, hlslOP->GetU32Const(3)}); - tmax = Builder.CreateLoad(tmax); - Args[index++] = tmax; + DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands()); + DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayNumOp); - Args[DXIL::OperandIndex::kTraceRayPayloadOpIdx] = payLoad; - - Type *Ty = payLoad->getType(); - Function *F = hlslOP->GetOpFunc(opcode, Ty); + Type *Ty = Payload->getType(); + Function *F = OP->GetOpFunc(OpCode, Ty); return Builder.CreateCall(F, Args); } @@ -5812,33 +5808,16 @@ Value *TranslateTraceRayInline(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, Value *Args[DXIL::OperandIndex::kTraceRayInlineNumOp]; Args[0] = opArg; - for (unsigned i = 1; i < HLOperandIndex::kTraceRayInlineRayDescOpIdx; i++) { - Args[i] = CI->getArgOperand(i); - } + unsigned Index = 1, HLIndex = 1; + while (HLIndex < HLOperandIndex::kTraceRayInlineRayDescOpIdx) + Args[Index++] = CI->getArgOperand(HLIndex++); IRBuilder<> Builder(CI); - unsigned hlIndex = HLOperandIndex::kTraceRayInlineRayDescOpIdx; - unsigned index = DXIL::OperandIndex::kTraceRayInlineRayDescOpIdx; - - // struct RayDesc - //{ - // float3 Origin; - Value *origin = CI->getArgOperand(hlIndex++); - Args[index++] = Builder.CreateExtractElement(origin, (uint64_t)0); - Args[index++] = Builder.CreateExtractElement(origin, 1); - Args[index++] = Builder.CreateExtractElement(origin, 2); - // float TMin; - Args[index++] = CI->getArgOperand(hlIndex++); - // float3 Direction; - Value *direction = CI->getArgOperand(hlIndex++); - Args[index++] = Builder.CreateExtractElement(direction, (uint64_t)0); - Args[index++] = Builder.CreateExtractElement(direction, 1); - Args[index++] = Builder.CreateExtractElement(direction, 2); - // float TMax; - Args[index++] = CI->getArgOperand(hlIndex++); - //}; - - DXASSERT_NOMSG(index == DXIL::OperandIndex::kTraceRayInlineNumOp); + DXASSERT_NOMSG(HLIndex == HLOperandIndex::kTraceRayInlineRayDescOpIdx); + DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayInlineRayDescOpIdx); + TransferRayDescArgs(Args, hlslOP, Builder, CI, Index, HLIndex); + DXASSERT_NOMSG(HLIndex == CI->getNumArgOperands()); + DXASSERT_NOMSG(Index == DXIL::OperandIndex::kTraceRayInlineNumOp); Function *F = hlslOP->GetOpFunc(opcode, Builder.getVoidTy()); @@ -5953,19 +5932,31 @@ Value *TranslateNoArgVectorOperation(CallInst *CI, IntrinsicOp IOP, return retVal; } +template +static void GetMatrixIndices(Constant *&Rows, Constant *&Cols, bool Is3x4, + LLVMContext &Ctx) { + if (Is3x4) { + uint32_t RVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2}; + Rows = ConstantDataVector::get(Ctx, RVals); + ColElemTy CVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}; + Cols = ConstantDataVector::get(Ctx, CVals); + return; + } + uint32_t RVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2}; + Rows = ConstantDataVector::get(Ctx, RVals); + ColElemTy CVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; + Cols = ConstantDataVector::get(Ctx, CVals); +} + Value *TranslateNoArgMatrix3x4Operation( CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, HLOperationLowerHelper &helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { hlsl::OP *hlslOP = &helper.hlslOP; VectorType *Ty = cast(CI->getType()); - uint32_t rVals[] = {0, 0, 0, 0, 1, 1, 1, 1, 2, 2, 2, 2}; - Constant *rows = ConstantDataVector::get(CI->getContext(), rVals); - uint8_t cVals[] = {0, 1, 2, 3, 0, 1, 2, 3, 0, 1, 2, 3}; - Constant *cols = ConstantDataVector::get(CI->getContext(), cVals); - Value *retVal = - TrivialDxilOperation(opcode, {nullptr, rows, cols}, Ty, CI, hlslOP); - return retVal; + Constant *Rows, *Cols; + GetMatrixIndices(Rows, Cols, true, CI->getContext()); + return TrivialDxilOperation(opcode, {nullptr, Rows, Cols}, Ty, CI, hlslOP); } Value *TranslateNoArgTransposedMatrix3x4Operation( @@ -5974,13 +5965,9 @@ Value *TranslateNoArgTransposedMatrix3x4Operation( bool &Translated) { hlsl::OP *hlslOP = &helper.hlslOP; VectorType *Ty = cast(CI->getType()); - uint32_t rVals[] = {0, 1, 2, 0, 1, 2, 0, 1, 2, 0, 1, 2}; - Constant *rows = ConstantDataVector::get(CI->getContext(), rVals); - uint8_t cVals[] = {0, 0, 0, 1, 1, 1, 2, 2, 2, 3, 3, 3}; - Constant *cols = ConstantDataVector::get(CI->getContext(), cVals); - Value *retVal = - TrivialDxilOperation(opcode, {nullptr, rows, cols}, Ty, CI, hlslOP); - return retVal; + Constant *Rows, *Cols; + GetMatrixIndices(Rows, Cols, false, CI->getContext()); + return TrivialDxilOperation(opcode, {nullptr, Rows, Cols}, Ty, CI, hlslOP); } /* @@ -6184,55 +6171,49 @@ Value *TranslateUnpack(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, // Shader Execution Reordering. namespace { -Value *TranslateHitObjectMake(CallInst *CI, IntrinsicOp IOP, OP::OpCode Opcode, - HLOperationLowerHelper &Helper, - HLObjectOperationLowerHelper *ObjHelper, - bool &Translated) { +Value *TranslateHitObjectMakeNop(CallInst *CI, IntrinsicOp IOP, + OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { hlsl::OP *HlslOP = &Helper.hlslOP; IRBuilder<> Builder(CI); - unsigned SrcIdx = 1; - Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); - if (Opcode == OP::OpCode::HitObject_MakeNop) { - Value *HitObject = TrivialDxilOperation( - Opcode, {nullptr}, Type::getVoidTy(CI->getContext()), CI, HlslOP); - Builder.CreateStore(HitObject, HitObjectPtr); - DXASSERT( - CI->use_empty(), - "Default ctor return type is a Clang artifact. Value must not be used"); - return nullptr; - } + Value *HitObjectPtr = CI->getArgOperand(1); + Value *HitObject = TrivialDxilOperation( + Opcode, {nullptr}, Type::getVoidTy(CI->getContext()), CI, HlslOP); + Builder.CreateStore(HitObject, HitObjectPtr); + DXASSERT( + CI->use_empty(), + "Default ctor return type is a Clang artifact. Value must not be used"); + return nullptr; +} +Value *TranslateHitObjectMakeMiss(CallInst *CI, IntrinsicOp IOP, + OP::OpCode Opcode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { DXASSERT_NOMSG(CI->getNumArgOperands() == HLOperandIndex::kHitObjectMakeMiss_NumOp); - Value *RayFlags = CI->getArgOperand(SrcIdx++); - Value *MissShaderIdx = CI->getArgOperand(SrcIdx++); - DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectMakeMissRayDescOpIdx); - Value *RayDescOrigin = CI->getArgOperand(SrcIdx++); - Value *RayDescOriginX = - Builder.CreateExtractElement(RayDescOrigin, (uint64_t)0); - Value *RayDescOriginY = - Builder.CreateExtractElement(RayDescOrigin, (uint64_t)1); - Value *RayDescOriginZ = - Builder.CreateExtractElement(RayDescOrigin, (uint64_t)2); - - Value *RayDescTMin = CI->getArgOperand(SrcIdx++); - Value *RayDescDirection = CI->getArgOperand(SrcIdx++); - Value *RayDescDirectionX = - Builder.CreateExtractElement(RayDescDirection, (uint64_t)0); - Value *RayDescDirectionY = - Builder.CreateExtractElement(RayDescDirection, (uint64_t)1); - Value *RayDescDirectionZ = - Builder.CreateExtractElement(RayDescDirection, (uint64_t)2); - - Value *RayDescTMax = CI->getArgOperand(SrcIdx++); + hlsl::OP *OP = &Helper.hlslOP; + IRBuilder<> Builder(CI); + Value *Args[DXIL::OperandIndex::kHitObjectMakeMiss_NumOp]; + Args[0] = nullptr; // Filled in by TrivialDxilOperation + + unsigned DestIdx = 1, SrcIdx = 1; + Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); + Args[DestIdx++] = CI->getArgOperand(SrcIdx++); // RayFlags + Args[DestIdx++] = CI->getArgOperand(SrcIdx++); // MissShaderIdx + + DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectMakeMiss_RayDescOpIdx); + DXASSERT_NOMSG(DestIdx == + DXIL::OperandIndex::kHitObjectMakeMiss_RayDescOpIdx); + TransferRayDescArgs(Args, OP, Builder, CI, DestIdx, SrcIdx); DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); + DXASSERT_NOMSG(DestIdx == DXIL::OperandIndex::kHitObjectMakeMiss_NumOp); - Value *OutHitObject = TrivialDxilOperation( - Opcode, - {nullptr, RayFlags, MissShaderIdx, RayDescOriginX, RayDescOriginY, - RayDescOriginZ, RayDescTMin, RayDescDirectionX, RayDescDirectionY, - RayDescDirectionZ, RayDescTMax}, - Helper.voidTy, CI, HlslOP); + Value *OutHitObject = + TrivialDxilOperation(Opcode, Args, Helper.voidTy, CI, OP); Builder.CreateStore(OutHitObject, HitObjectPtr); return nullptr; } @@ -6299,7 +6280,32 @@ Value *TranslateHitObjectFromRayQuery(CallInst *CI, IntrinsicOp IOP, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches + hlsl::OP *OP = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + unsigned SrcIdx = 1; + Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); + Value *RayQuery = CI->getArgOperand(SrcIdx++); + + if (CI->getNumArgOperands() == + HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_NumOp) { + Value *HitKind = CI->getArgOperand(SrcIdx++); + Value *AttribSrc = CI->getArgOperand(SrcIdx++); + DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); + OpCode = DXIL::OpCode::HitObject_FromRayQueryWithAttrs; + Type *AttrTy = AttribSrc->getType(); + Value *OutHitObject = TrivialDxilOperation( + OpCode, {nullptr, RayQuery, HitKind, AttribSrc}, AttrTy, CI, OP); + Builder.CreateStore(OutHitObject, HitObjectPtr); + return nullptr; + } + + DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); + OpCode = DXIL::OpCode::HitObject_FromRayQuery; + Value *OutHitObject = + TrivialDxilOperation(OpCode, {nullptr, RayQuery}, Helper.voidTy, CI, OP); + Builder.CreateStore(OutHitObject, HitObjectPtr); + return nullptr; } Value *TranslateHitObjectTraceRay(CallInst *CI, IntrinsicOp IOP, @@ -6307,7 +6313,42 @@ Value *TranslateHitObjectTraceRay(CallInst *CI, IntrinsicOp IOP, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches + hlsl::OP *OP = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + DXASSERT_NOMSG(CI->getNumArgOperands() == + HLOperandIndex::kHitObjectTraceRay_NumOp); + Value *Args[DXIL::OperandIndex::kHitObjectTraceRay_NumOp]; + Value *OpArg = OP->GetU32Const(static_cast(OpCode)); + Args[0] = OpArg; + + unsigned DestIdx = 1, SrcIdx = 1; + Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); + Args[DestIdx++] = CI->getArgOperand(SrcIdx++); + for (; SrcIdx < HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx; + ++SrcIdx, ++DestIdx) { + Args[DestIdx] = CI->getArgOperand(SrcIdx); + } + + DXASSERT_NOMSG(SrcIdx == HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx); + DXASSERT_NOMSG(DestIdx == + DXIL::OperandIndex::kHitObjectTraceRay_RayDescOpIdx); + TransferRayDescArgs(Args, OP, Builder, CI, DestIdx, SrcIdx); + DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands() - 1); + DXASSERT_NOMSG(DestIdx == + DXIL::OperandIndex::kHitObjectTraceRay_PayloadOpIdx); + + Value *Payload = CI->getArgOperand(SrcIdx++); + Args[DestIdx++] = Payload; + + DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); + DXASSERT_NOMSG(DestIdx == DXIL::OperandIndex::kHitObjectTraceRay_NumOp); + + Function *F = OP->GetOpFunc(OpCode, Payload->getType()); + + Value *OutHitObject = Builder.CreateCall(F, Args); + Builder.CreateStore(OutHitObject, HitObjectPtr); + return nullptr; } Value *TranslateHitObjectInvoke(CallInst *CI, IntrinsicOp IOP, @@ -6315,7 +6356,16 @@ Value *TranslateHitObjectInvoke(CallInst *CI, IntrinsicOp IOP, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - return nullptr; // TODO: Merge SER DXIL patches + unsigned SrcIdx = 1; + Value *HitObjectPtr = CI->getArgOperand(SrcIdx++); + Value *Payload = CI->getArgOperand(SrcIdx++); + DXASSERT_NOMSG(SrcIdx == CI->getNumArgOperands()); + + IRBuilder<> Builder(CI); + Value *HitObject = Builder.CreateLoad(HitObjectPtr); + TrivialDxilOperation(OpCode, {nullptr, HitObject, Payload}, + Payload->getType(), CI, &Helper.hlslOP); + return nullptr; } Value *TranslateHitObjectGetAttributes(CallInst *CI, IntrinsicOp IOP, @@ -6323,7 +6373,16 @@ Value *TranslateHitObjectGetAttributes(CallInst *CI, IntrinsicOp IOP, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches + hlsl::OP *OP = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Value *HitObjectPtr = CI->getArgOperand(1); + Value *HitObject = Builder.CreateLoad(HitObjectPtr); + Value *AttrOutPtr = + CI->getArgOperand(HLOperandIndex::kHitObjectGetAttributes_AttributeOpIdx); + TrivialDxilOperation(OpCode, {nullptr, HitObject, AttrOutPtr}, + AttrOutPtr->getType(), CI, OP); + return nullptr; } Value *TranslateHitObjectScalarGetter(CallInst *CI, IntrinsicOp IOP, @@ -6331,7 +6390,12 @@ Value *TranslateHitObjectScalarGetter(CallInst *CI, IntrinsicOp IOP, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches + hlsl::OP *OP = &Helper.hlslOP; + Value *HitObjectPtr = CI->getArgOperand(1); + IRBuilder<> Builder(CI); + Value *HitObject = Builder.CreateLoad(HitObjectPtr); + return TrivialDxilOperation(OpCode, {nullptr, HitObject}, CI->getType(), CI, + OP); } Value *TranslateHitObjectVectorGetter(CallInst *CI, IntrinsicOp IOP, @@ -6339,7 +6403,24 @@ Value *TranslateHitObjectVectorGetter(CallInst *CI, IntrinsicOp IOP, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches + hlsl::OP *OP = &Helper.hlslOP; + Value *HitObjectPtr = CI->getArgOperand(1); + IRBuilder<> Builder(CI); + Value *HitObject = Builder.CreateLoad(HitObjectPtr); + VectorType *Ty = cast(CI->getType()); + uint32_t Vals[] = {0, 1, 2, 3}; + Constant *Src = ConstantDataVector::get(CI->getContext(), Vals); + return TrivialDxilOperation(OpCode, {nullptr, HitObject, Src}, Ty, CI, OP); +} + +static bool IsHitObject3x4Getter(IntrinsicOp IOP) { + switch (IOP) { + default: + return false; + case IntrinsicOp::MOP_DxHitObject_GetObjectToWorld3x4: + case IntrinsicOp::MOP_DxHitObject_GetWorldToObject3x4: + return true; + } } Value *TranslateHitObjectMatrixGetter(CallInst *CI, IntrinsicOp IOP, @@ -6347,21 +6428,51 @@ Value *TranslateHitObjectMatrixGetter(CallInst *CI, IntrinsicOp IOP, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches + hlsl::OP *OP = &Helper.hlslOP; + Value *HitObjectPtr = CI->getArgOperand(1); + IRBuilder<> Builder(CI); + Value *HitObject = Builder.CreateLoad(HitObjectPtr); + + // Create 3x4 matrix indices + bool Is3x4 = IsHitObject3x4Getter(IOP); + Constant *Rows, *Cols; + GetMatrixIndices(Rows, Cols, Is3x4, CI->getContext()); + + VectorType *Ty = cast(CI->getType()); + return TrivialDxilOperation(OpCode, {nullptr, HitObject, Rows, Cols}, Ty, CI, + OP); } Value *TranslateHitObjectLoadLocalRootTableConstant( CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches + hlsl::OP *OP = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Value *HitObjectPtr = CI->getArgOperand(1); + Value *Offset = CI->getArgOperand(2); + + Value *HitObject = Builder.CreateLoad(HitObjectPtr); + return TrivialDxilOperation(OpCode, {nullptr, HitObject, Offset}, + Helper.voidTy, CI, OP); } Value *TranslateHitObjectSetShaderTableIndex( CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, HLOperationLowerHelper &Helper, HLObjectOperationLowerHelper *pObjHelper, bool &Translated) { - return UndefValue::get(CI->getType()); // TODO: Merge SER DXIL patches + hlsl::OP *OP = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Value *HitObjectPtr = CI->getArgOperand(1); + Value *ShaderTableIndex = CI->getArgOperand(2); + + Value *InHitObject = Builder.CreateLoad(HitObjectPtr); + Value *OutHitObject = TrivialDxilOperation( + OpCode, {nullptr, InHitObject, ShaderTableIndex}, Helper.voidTy, CI, OP); + Builder.CreateStore(OutHitObject, HitObjectPtr); + return nullptr; } } // namespace @@ -6417,6 +6528,200 @@ Value *TranslateSelect(CallInst *CI, IntrinsicOp IOP, OP::OpCode opcode, return Builder.CreateSelect(cond, t, f); } + +Value *TranslateMatVecMul(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { + + hlsl::OP *HlslOp = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Constant *OpArg = HlslOp->GetU32Const(static_cast(OpCode)); + + // Input parameters + Value *InputVector = + CI->getArgOperand(HLOperandIndex::kMatVecMulInputVectorIdx); + Value *InputIsUnsigned = + CI->getArgOperand(HLOperandIndex::kMatVecMulIsInputUnsignedIdx); + Value *InputInterpretation = + CI->getArgOperand(HLOperandIndex::kMatVecMulInputInterpretationIdx); + + // Matrix parameters + Value *MatrixBuffer = + CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixBufferIdx); + Value *MatrixOffset = + CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixOffsetIdx); + Value *MatrixInterpretation = + CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixInterpretationIdx); + Value *MatrixM = CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixMIdx); + Value *MatrixK = CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixKIdx); + Value *MatrixLayout = + CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixLayoutIdx); + Value *MatrixTranspose = + CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixTransposeIdx); + Value *MatrixStride = + CI->getArgOperand(HLOperandIndex::kMatVecMulMatrixStrideIdx); + + // Output parameters + Value *OutputIsUnsigned = + CI->getArgOperand(HLOperandIndex::kMatVecMulIsOutputUnsignedIdx); + + // Get the DXIL function for the operation + Function *DxilFunc = HlslOp->GetOpFunc( + OpCode, {CI->getArgOperand(HLOperandIndex::kMatVecMulOutputVectorIdx) + ->getType() + ->getPointerElementType(), + InputVector->getType()}); + + // Create a call to the DXIL function + Value *NewCI = Builder.CreateCall( + DxilFunc, + {OpArg, InputVector, InputIsUnsigned, InputInterpretation, MatrixBuffer, + MatrixOffset, MatrixInterpretation, MatrixM, MatrixK, MatrixLayout, + MatrixTranspose, MatrixStride, OutputIsUnsigned}); + + // Get the output parameter and store the result + Value *OutParam = + CI->getArgOperand(HLOperandIndex::kMatVecMulOutputVectorIdx); + + Builder.CreateStore(NewCI, OutParam); + + return nullptr; +} + +Value *TranslateMatVecMulAdd(CallInst *CI, IntrinsicOp IOP, OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { + + hlsl::OP *HlslOp = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Constant *OpArg = HlslOp->GetU32Const(static_cast(OpCode)); + + // Input vector parameters + Value *InputVector = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddInputVectorIdx); + Value *InputIsUnsigned = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddIsInputUnsignedIdx); + Value *InputInterpretation = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddInputInterpretationIdx); + + // Matrix parameters + Value *MatrixBuffer = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixBufferIdx); + Value *MatrixOffset = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixOffsetIdx); + Value *MatrixInterpretation = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixInterpretationIdx); + Value *MatrixM = CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixMIdx); + Value *MatrixK = CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixKIdx); + Value *MatrixLayout = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixLayoutIdx); + Value *MatrixTranspose = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixTransposeIdx); + Value *MatrixStride = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddMatrixStrideIdx); + + // Bias parameters + Value *BiasBuffer = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddBiasBufferIdx); + Value *BiasOffset = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddBiasOffsetIdx); + Value *BiasInterpretation = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddBiasInterpretationIdx); + + // Output parameters + Value *OutputIsUnsigned = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddIsOutputUnsignedIdx); + + // Get the DXIL function for the operation + Function *DxilFunc = HlslOp->GetOpFunc( + OpCode, {CI->getArgOperand(HLOperandIndex::kMatVecMulAddOutputVectorIdx) + ->getType() + ->getPointerElementType(), + InputVector->getType()}); + + // Create a call to the DXIL function + Value *NewCI = Builder.CreateCall( + DxilFunc, {OpArg, InputVector, InputIsUnsigned, InputInterpretation, + MatrixBuffer, MatrixOffset, MatrixInterpretation, MatrixM, + MatrixK, MatrixLayout, MatrixTranspose, MatrixStride, + BiasBuffer, BiasOffset, BiasInterpretation, OutputIsUnsigned}); + + // Store the result in the output parameter + Value *OutParam = + CI->getArgOperand(HLOperandIndex::kMatVecMulAddOutputVectorIdx); + Builder.CreateStore(NewCI, OutParam); + + return nullptr; +} + +Value *TranslateOuterProductAccumulate(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { + + hlsl::OP *HlslOp = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Constant *OpArg = HlslOp->GetU32Const(static_cast(OpCode)); + + // Input vector parameters + Value *InputVector1 = + CI->getArgOperand(HLOperandIndex::kOuterProdAccInputVec1Idx); + Value *InputVector2 = + CI->getArgOperand(HLOperandIndex::kOuterProdAccInputVec2Idx); + + // Matrix parameters + Value *MatrixBuffer = + CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixIdx); + Value *MatrixOffset = + CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixOffsetIdx); + Value *MatrixInterpretation = + CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixInterpretationIdx); + Value *MatrixLayout = + CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixLayoutIdx); + Value *MatrixStride = + CI->getArgOperand(HLOperandIndex::kOuterProdAccMatrixStrideIdx); + + // Get the DXIL function for the operation + Function *DxilFunc = HlslOp->GetOpFunc( + OpCode, {InputVector1->getType(), InputVector2->getType()}); + + return Builder.CreateCall( + DxilFunc, {OpArg, InputVector1, InputVector2, MatrixBuffer, MatrixOffset, + MatrixInterpretation, MatrixLayout, MatrixStride}); +} + +Value *TranslateVectorAccumulate(CallInst *CI, IntrinsicOp IOP, + OP::OpCode OpCode, + HLOperationLowerHelper &Helper, + HLObjectOperationLowerHelper *ObjHelper, + bool &Translated) { + + hlsl::OP *HlslOp = &Helper.hlslOP; + IRBuilder<> Builder(CI); + + Constant *OpArg = HlslOp->GetU32Const(static_cast(OpCode)); + + // Input vector parameter + Value *InputVector = CI->getArgOperand(HLOperandIndex::kVectorAccInputVecIdx); + + // Matrix parameters + Value *MatrixBuffer = CI->getArgOperand(HLOperandIndex::kVectorAccMatrixIdx); + Value *MatrixOffset = + CI->getArgOperand(HLOperandIndex::kVectorAccMatrixOffsetIdx); + + // Get the DXIL function for the operation + Function *DxilFunc = HlslOp->GetOpFunc(OpCode, InputVector->getType()); + + return Builder.CreateCall(DxilFunc, + {OpArg, InputVector, MatrixBuffer, MatrixOffset}); +} + } // namespace // Lower table. @@ -7063,7 +7368,7 @@ IntrinsicLower gLowerTable[] = { DXIL::OpCode::NumOpCodes}, {IntrinsicOp::MOP_InterlockedUMin, TranslateMopAtomicBinaryOperation, DXIL::OpCode::NumOpCodes}, - {IntrinsicOp::MOP_DxHitObject_MakeNop, TranslateHitObjectMake, + {IntrinsicOp::MOP_DxHitObject_MakeNop, TranslateHitObjectMakeNop, DXIL::OpCode::HitObject_MakeNop}, {IntrinsicOp::IOP_DxMaybeReorderThread, TranslateMaybeReorderThread, DXIL::OpCode::MaybeReorderThread}, @@ -7123,13 +7428,22 @@ IntrinsicLower gLowerTable[] = { {IntrinsicOp::MOP_DxHitObject_LoadLocalRootTableConstant, TranslateHitObjectLoadLocalRootTableConstant, DXIL::OpCode::HitObject_LoadLocalRootTableConstant}, - {IntrinsicOp::MOP_DxHitObject_MakeMiss, TranslateHitObjectMake, + {IntrinsicOp::MOP_DxHitObject_MakeMiss, TranslateHitObjectMakeMiss, DXIL::OpCode::HitObject_MakeMiss}, {IntrinsicOp::MOP_DxHitObject_SetShaderTableIndex, TranslateHitObjectSetShaderTableIndex, DXIL::OpCode::HitObject_SetShaderTableIndex}, {IntrinsicOp::MOP_DxHitObject_TraceRay, TranslateHitObjectTraceRay, DXIL::OpCode::HitObject_TraceRay}, + + {IntrinsicOp::IOP___builtin_MatVecMul, TranslateMatVecMul, + DXIL::OpCode::MatVecMul}, + {IntrinsicOp::IOP___builtin_MatVecMulAdd, TranslateMatVecMulAdd, + DXIL::OpCode::MatVecMulAdd}, + {IntrinsicOp::IOP___builtin_OuterProductAccumulate, + TranslateOuterProductAccumulate, DXIL::OpCode::OuterProductAccumulate}, + {IntrinsicOp::IOP___builtin_VectorAccumulate, TranslateVectorAccumulate, + DXIL::OpCode::VectorAccumulate}, }; } // namespace static_assert( @@ -8220,7 +8534,7 @@ Value *TranslateStructBufMatLd(CallInst *CI, IRBuilder<> &Builder, Value *status, Value *bufIdx, Value *baseOffset, const DataLayout &DL) { - ResLoadHelper helper(CI, RK, handle, bufIdx, baseOffset); + ResLoadHelper helper(CI, RK, handle, bufIdx, baseOffset, status); #ifndef NDEBUG Value *ptr = CI->getArgOperand(HLOperandIndex::kMatLoadPtrOpIdx); Type *matType = ptr->getType()->getPointerElementType(); @@ -8547,7 +8861,7 @@ void TranslateStructBufSubscriptUser(Instruction *user, Value *handle, } } else if (LoadInst *LdInst = dyn_cast(user)) { // Load of scalar/vector within a struct or structured raw load. - ResLoadHelper helper(LdInst, ResKind, handle, bufIdx, baseOffset); + ResLoadHelper helper(LdInst, ResKind, handle, bufIdx, baseOffset, status); TranslateBufLoad(helper, ResKind, Builder, OP, DL); LdInst->eraseFromParent(); @@ -8922,7 +9236,8 @@ void TranslateHLSubscript(CallInst *CI, HLSubscriptOpcode opcode, IRBuilder<> Builder(CI); if (LoadInst *ldInst = dyn_cast(*U)) { Value *Offset = UndefValue::get(Builder.getInt32Ty()); - ResLoadHelper ldHelper(ldInst, RK, handle, coord, Offset, mipLevel); + ResLoadHelper ldHelper(ldInst, RK, handle, coord, Offset, + /*status*/ nullptr, mipLevel); TranslateBufLoad(ldHelper, RK, Builder, hlslOP, helper.dataLayout); ldInst->eraseFromParent(); } else { diff --git a/lib/Support/APFloat.cpp b/lib/Support/APFloat.cpp index 3c76c72271..40c22459e2 100644 --- a/lib/Support/APFloat.cpp +++ b/lib/Support/APFloat.cpp @@ -331,7 +331,7 @@ trailingHexadecimalFraction(StringRef::iterator p, StringRef::iterator end, /* If we ran off the end it is exactly zero or one-half, otherwise a little more. */ - if (hexDigit == -1U) + if (hexDigit == ~0U) return digitValue == 0 ? lfExactlyZero: lfExactlyHalf; else return digitValue == 0 ? lfLessThanHalf: lfMoreThanHalf; @@ -446,7 +446,7 @@ ulpsFromBoundary(const integerPart *parts, unsigned int bits, bool isNearest) if (~parts[count]) return ~(integerPart) 0; /* A lot. */ - return -parts[0]; + return (~parts[0] + 1); } return ~(integerPart) 0; /* A lot. */ @@ -2368,7 +2368,7 @@ APFloat::convertFromHexadecimalString(StringRef s, roundingMode rounding_mode) } hex_value = hexDigitValue(*p); - if (hex_value == -1U) + if (hex_value == ~0U) break; p++; diff --git a/lib/Support/APInt.cpp b/lib/Support/APInt.cpp index 23f89bb66f..d01238a552 100644 --- a/lib/Support/APInt.cpp +++ b/lib/Support/APInt.cpp @@ -70,7 +70,7 @@ inline static unsigned getDigit(char cdigit, uint8_t radix) { if (r < radix) return r; - return -1U; + return std::numeric_limits::max(); } @@ -79,7 +79,7 @@ void APInt::initSlowCase(unsigned numBits, uint64_t val, bool isSigned) { pVal[0] = val; if (isSigned && int64_t(val) < 0) for (unsigned i = 1; i < getNumWords(); ++i) - pVal[i] = -1ULL; + pVal[i] = std::numeric_limits::max(); } void APInt::initSlowCase(const APInt& that) { @@ -735,7 +735,7 @@ unsigned APInt::countLeadingOnes() const { unsigned Count = llvm::countLeadingOnes(pVal[i] << shift); if (Count == highWordBits) { for (i--; i >= 0; --i) { - if (pVal[i] == -1ULL) + if (pVal[i] == std::numeric_limits::max()) Count += APINT_BITS_PER_WORD; else { Count += llvm::countLeadingOnes(pVal[i]); @@ -761,7 +761,8 @@ unsigned APInt::countTrailingZeros() const { unsigned APInt::countTrailingOnesSlowCase() const { unsigned Count = 0; unsigned i = 0; - for (; i < getNumWords() && pVal[i] == -1ULL; ++i) + for (; i < getNumWords() && pVal[i] == std::numeric_limits::max(); + ++i) Count += APINT_BITS_PER_WORD; if (i < getNumWords()) Count += llvm::countTrailingOnes(pVal[i]); @@ -1070,7 +1071,7 @@ APInt APInt::ashr(unsigned shiftAmt) const { // issues in the algorithm below. if (shiftAmt == BitWidth) { if (isNegative()) - return APInt(BitWidth, -1ULL, true); + return APInt(BitWidth, std::numeric_limits::max(), true); else return APInt(BitWidth, 0); } @@ -1123,7 +1124,8 @@ APInt APInt::ashr(unsigned shiftAmt) const { } // Remaining words are 0 or -1, just assign them. - uint64_t fillValue = (isNegative() ? -1ULL : 0); + uint64_t fillValue = + (isNegative() ? std::numeric_limits::max() : 0); for (unsigned i = breakWord+1; i < getNumWords(); ++i) val[i] = fillValue; APInt Result(val, BitWidth); @@ -2192,7 +2194,18 @@ void APInt::toString(SmallVectorImpl &Str, unsigned Radix, N = I; } else { Str.push_back('-'); - N = -(uint64_t)I; + // In this else block, all values of I must be less than 0. + // + // Because values are stored in 2's complement and I is a signed + // integer, the expression -I is equivalent to (~I + 1) for all values + // of I, except INT64_MIN, where -I is undefined behavior in C++ due to + // overflow. + // + // However, (~I + 1) is still well-defined even when I == INT64_MIN, and + // it evaluates to the same bit pattern as INT64_MIN. Because N is + // unsigned, assigning N = ~I + 1 preserves the exact bit pattern + // and correctly represents the 2's complement value of -I. + N = (~I + 1); } } @@ -2408,7 +2421,7 @@ APInt::tcLSB(const integerPart *parts, unsigned int n) } } - return -1U; + return std::numeric_limits::max(); } /* Returns the bit number of the most significant set bit of a number. @@ -2428,7 +2441,7 @@ APInt::tcMSB(const integerPart *parts, unsigned int n) } } while (n); - return -1U; + return std::numeric_limits::max(); } /* Copy the bit vector of width srcBITS from SRC, starting at bit diff --git a/lib/Support/DataExtractor.cpp b/lib/Support/DataExtractor.cpp index 5d6d60a87f..625fb3595a 100644 --- a/lib/Support/DataExtractor.cpp +++ b/lib/Support/DataExtractor.cpp @@ -168,7 +168,7 @@ int64_t DataExtractor::getSLEB128(uint32_t *offset_ptr) const { // Sign bit of byte is 2nd high order bit (0x40) if (shift < 64 && (byte & 0x40)) - result |= -(1ULL << shift); + result |= (~(1ULL << shift) + 1); *offset_ptr = offset; return result; diff --git a/lib/Support/StringRef.cpp b/lib/Support/StringRef.cpp index ddece087a9..52b949d826 100644 --- a/lib/Support/StringRef.cpp +++ b/lib/Support/StringRef.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/Hashing.h" #include "llvm/ADT/edit_distance.h" #include +#include using namespace llvm; @@ -393,13 +394,16 @@ bool llvm::getAsSignedInteger(StringRef Str, unsigned Radix, // Get the positive part of the value. if (getAsUnsignedInteger(Str.substr(1), Radix, ULLVal) || - // Reject values so large they'd overflow as negative signed, but allow - // "-0". This negates the unsigned so that the negative isn't undefined - // on signed overflow. - (long long)-ULLVal > 0) + // Reject values larger than what can be represented as negative signed. + // The most negative long long is LLONG_MIN, which has magnitude + // (LLONG_MAX + 1). Values larger than this magnitude cannot be negated + // without overflow. + ULLVal > static_cast( + std::numeric_limits::max()) + + 1) return true; - Result = -ULLVal; + Result = (~ULLVal + 1); return false; } diff --git a/lib/Support/TimeValue.cpp b/lib/Support/TimeValue.cpp index 136b93ecee..06de27bbda 100644 --- a/lib/Support/TimeValue.cpp +++ b/lib/Support/TimeValue.cpp @@ -19,8 +19,7 @@ using namespace sys; const TimeValue::SecondsType TimeValue::PosixZeroTimeSeconds = -946684800; -const TimeValue::SecondsType - TimeValue::Win32ZeroTimeSeconds = -12591158400ULL; +const TimeValue::SecondsType TimeValue::Win32ZeroTimeSeconds = -12591158400LL; void TimeValue::normalize( void ) { diff --git a/lib/Support/raw_ostream.cpp b/lib/Support/raw_ostream.cpp index b11ffb15d5..595468a6dc 100644 --- a/lib/Support/raw_ostream.cpp +++ b/lib/Support/raw_ostream.cpp @@ -134,13 +134,18 @@ raw_ostream &raw_ostream::operator<<(unsigned long N) { } raw_ostream &raw_ostream::operator<<(long N) { + // A positive signed long has the same value when casted to its unsigned + // counterpart. If its negative, then we'll handle it in the below if block. + unsigned long UN = static_cast(N); + if (N < 0 && writeBase == 10) { *this << '-'; - // Avoid undefined behavior on LONG_MIN with a cast. - N = -(unsigned long)N; + // Since N is negative and we're storing the result in an unsigned Long, + // we can use the equivalence of -N == ~N + 1 to get the positive value. + UN = ~N + 1UL; } - return this->operator<<(static_cast(N)); + return this->operator<<(UN); } raw_ostream &raw_ostream::operator<<(unsigned long long N) { @@ -169,13 +174,18 @@ raw_ostream &raw_ostream::operator<<(unsigned long long N) { } raw_ostream &raw_ostream::operator<<(long long N) { + // A positive signed long has the same value when casted to its unsigned + // counterpart. If its negative, then we'll handle it in the below if block. + unsigned long long UN = static_cast(N); + if (N < 0 && writeBase == 10) { *this << '-'; - // Avoid undefined behavior on INT64_MIN with a cast. - N = -(unsigned long long)N; + // Since N is negative and we're storing the result in an unsigned Long, + // we can use the equivalence of -N == ~N + 1 to get the positive value. + UN = ~N + 1ULL; } - return this->operator<<(static_cast(N)); + return this->operator<<(UN); } // HLSL Change Starts - Generalize non-base10 printing. @@ -470,7 +480,10 @@ raw_ostream &raw_ostream::operator<<(const FormattedNumber &FN) { char *EndPtr = NumberBuffer+sizeof(NumberBuffer); char *CurPtr = EndPtr; bool Neg = (FN.DecValue < 0); - uint64_t N = Neg ? -static_cast(FN.DecValue) : FN.DecValue; + // If the value is negative, and because we are storing the result of the ~ + // operation in an unsigned value, we can use the equivalence of + // -N == ~N + 1 to get the positive value of the negative number + uint64_t N = Neg ? (~FN.DecValue + 1UL) : FN.DecValue; while (N) { *--CurPtr = '0' + char(N % 10); N /= 10; diff --git a/lib/Transforms/IPO/DeadArgumentElimination.cpp b/lib/Transforms/IPO/DeadArgumentElimination.cpp index d044764025..0cf9f7797a 100644 --- a/lib/Transforms/IPO/DeadArgumentElimination.cpp +++ b/lib/Transforms/IPO/DeadArgumentElimination.cpp @@ -146,7 +146,7 @@ namespace { private: Liveness MarkIfNotLive(RetOrArg Use, UseVector &MaybeLiveUses); Liveness SurveyUse(const Use *U, UseVector &MaybeLiveUses, - unsigned RetValNum = -1U); + unsigned RetValNum = ~0U); Liveness SurveyUses(const Value *V, UseVector &MaybeLiveUses); void SurveyFunction(const Function &F); @@ -442,7 +442,7 @@ DAE::Liveness DAE::SurveyUse(const Use *U, // that U is really a use of an insertvalue instruction that uses the // original Use. const Function *F = RI->getParent()->getParent(); - if (RetValNum != -1U) { + if (RetValNum != ~0U) { RetOrArg Use = CreateRet(F, RetValNum); // We might be live, depending on the liveness of Use. return MarkIfNotLive(Use, MaybeLiveUses); diff --git a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp index 2d28b14213..66e01198bd 100644 --- a/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp +++ b/lib/Transforms/InstCombine/InstCombineSimplifyDemanded.cpp @@ -998,7 +998,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, for (unsigned i = 0; i < VWidth; i++) { if (DemandedElts[i]) { unsigned MaskVal = Shuffle->getMaskValue(i); - if (MaskVal != -1u) { + if (MaskVal != ~0u) { assert(MaskVal < LHSVWidth * 2 && "shufflevector mask index out of range!"); if (MaskVal < LHSVWidth) @@ -1022,7 +1022,7 @@ Value *InstCombiner::SimplifyDemandedVectorElts(Value *V, APInt DemandedElts, bool NewUndefElts = false; for (unsigned i = 0; i < VWidth; i++) { unsigned MaskVal = Shuffle->getMaskValue(i); - if (MaskVal == -1u) { + if (MaskVal == ~0u) { UndefElts.setBit(i); } else if (!DemandedElts[i]) { NewUndefElts = true; diff --git a/lib/Transforms/InstCombine/InstructionCombining.cpp b/lib/Transforms/InstCombine/InstructionCombining.cpp index 6bc322fa92..c93232b67f 100644 --- a/lib/Transforms/InstCombine/InstructionCombining.cpp +++ b/lib/Transforms/InstCombine/InstructionCombining.cpp @@ -1937,7 +1937,8 @@ Instruction *InstCombiner::visitAllocSite(Instruction &MI) { } else if (IntrinsicInst *II = dyn_cast(I)) { if (II->getIntrinsicID() == Intrinsic::objectsize) { ConstantInt *CI = cast(II->getArgOperand(1)); - uint64_t DontKnow = CI->isZero() ? -1ULL : 0; + uint64_t DontKnow = + CI->isZero() ? std::numeric_limits::max() : 0; ReplaceInstUsesWith(*I, ConstantInt::get(I->getType(), DontKnow)); } } diff --git a/lib/Transforms/Scalar/LoadCombine.cpp b/lib/Transforms/Scalar/LoadCombine.cpp index 6d358744ef..fb48513c18 100644 --- a/lib/Transforms/Scalar/LoadCombine.cpp +++ b/lib/Transforms/Scalar/LoadCombine.cpp @@ -131,10 +131,10 @@ bool LoadCombine::aggregateLoads(SmallVectorImpl &Loads) { LoadInst *BaseLoad = nullptr; SmallVector AggregateLoads; bool Combined = false; - uint64_t PrevOffset = -1ull; + uint64_t PrevOffset = std::numeric_limits::max(); uint64_t PrevSize = 0; for (auto &L : Loads) { - if (PrevOffset == -1ull) { + if (PrevOffset == std::numeric_limits::max()) { BaseLoad = L.Load; PrevOffset = L.POP.Offset; PrevSize = L.Load->getModule()->getDataLayout().getTypeStoreSize( @@ -186,7 +186,7 @@ bool LoadCombine::combineLoads(SmallVectorImpl &Loads) { // Find first load. This is where we put the new load. LoadPOPPair FirstLP; - FirstLP.InsertOrder = -1u; + FirstLP.InsertOrder = std::numeric_limits::max(); for (const auto &L : Loads) if (L.InsertOrder < FirstLP.InsertOrder) FirstLP = L; diff --git a/lib/Transforms/Scalar/LoopStrengthReduce.cpp b/lib/Transforms/Scalar/LoopStrengthReduce.cpp index 3ab9367a6b..60962ec69a 100644 --- a/lib/Transforms/Scalar/LoopStrengthReduce.cpp +++ b/lib/Transforms/Scalar/LoopStrengthReduce.cpp @@ -1395,8 +1395,11 @@ static bool isAMCompletelyFolded(const TargetTransformInfo &TTI, // ICmpZero -1*ScaleReg + BaseOffset => ICmp ScaleReg, BaseOffset // Offs is the ICmp immediate. if (Scale == 0) - // The cast does the right thing with INT64_MIN. - BaseOffset = -(uint64_t)BaseOffset; + // Negate BaseOffset using two's complement (~x + 1) to avoid undefined + // behavior. Simple negation (-BaseOffset) would be undefined for + // INT64_MIN since -INT64_MIN cannot fit in int64_t. Two's complement + // gives the expected wraparound behavior: -INT64_MIN becomes INT64_MIN. + BaseOffset = ~BaseOffset + 1ULL; return TTI.isLegalICmpImmediate(BaseOffset); } @@ -3000,7 +3003,7 @@ void LSRInstance::CollectFixupsAndInitialFormulae() { // of -1) are now also interesting. for (size_t i = 0, e = Factors.size(); i != e; ++i) if (Factors[i] != -1) - Factors.insert(-(uint64_t)Factors[i]); + Factors.insert(~Factors[i] + 1ULL); Factors.insert(-1); } @@ -3739,7 +3742,7 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { const SCEV *OrigReg = WI.OrigReg; Type *IntTy = SE.getEffectiveSCEVType(OrigReg->getType()); - const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, -(uint64_t)Imm)); + const SCEV *NegImmS = SE.getSCEV(ConstantInt::get(IntTy, ~Imm + 1ULL)); unsigned BitWidth = SE.getTypeSizeInBits(IntTy); // TODO: Use a more targeted data structure. @@ -3754,8 +3757,8 @@ void LSRInstance::GenerateCrossUseConstantOffsets() { if (F.ScaledReg == OrigReg) { int64_t Offset = (uint64_t)F.BaseOffset + Imm * (uint64_t)F.Scale; // Don't create 50 + reg(-50). - if (F.referencesReg(SE.getSCEV( - ConstantInt::get(IntTy, -(uint64_t)Offset)))) + if (F.referencesReg( + SE.getSCEV(ConstantInt::get(IntTy, ~Offset + 1ULL)))) continue; Formula NewF = F; NewF.BaseOffset = Offset; @@ -4556,7 +4559,7 @@ Value *LSRInstance::Expand(const LSRFixup &LF, const Formula &F, // The other interesting way of "folding" with an ICmpZero is to use a // negated immediate. if (!ICmpScaledV) - ICmpScaledV = ConstantInt::get(IntTy, -(uint64_t)Offset); + ICmpScaledV = ConstantInt::get(IntTy, ~Offset + 1ULL); else { Ops.push_back(SE.getUnknown(ICmpScaledV)); ICmpScaledV = ConstantInt::get(IntTy, Offset); @@ -4608,8 +4611,8 @@ Value *LSRInstance::Expand(const LSRFixup &LF, const Formula &F, assert((F.Scale == 0 || F.Scale == 1) && "ICmp does not support folding a global value and " "a scale at the same time!"); - Constant *C = ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy), - -(uint64_t)Offset); + Constant *C = + ConstantInt::getSigned(SE.getEffectiveSCEVType(OpTy), ~Offset + 1ULL); if (C->getType() != OpTy) C = ConstantExpr::getCast(CastInst::getCastOpcode(C, false, OpTy, false), diff --git a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp index e487079b94..54250ad36d 100644 --- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp +++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp @@ -129,7 +129,6 @@ class SROA_Helper { void RewriteMemIntrin(MemIntrinsic *MI, Value *OldV); void RewriteCall(CallInst *CI); void RewriteBitCast(BitCastInst *BCI); - void RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn, bool bOut); }; } // namespace @@ -1478,6 +1477,57 @@ void isSafePHISelectUseForScalarRepl(Instruction *I, uint64_t Offset, } } +// Returns whether the `OpIdx` argument of HL intrinsic call `CI` is expected to +// be a user-defined-type. +static bool isUDTIntrinsicArg(CallInst *CI, unsigned OpIdx) { + if (HLOpcodeGroup::HLIntrinsic != GetHLOpcodeGroup(CI->getCalledFunction())) + return false; + const unsigned NumOps = CI->getNumArgOperands(); + switch (static_cast(GetHLOpcode(CI))) { + case IntrinsicOp::IOP_TraceRay: + if (NumOps == HLOperandIndex::kTraceRay_PreNumOp && + OpIdx == HLOperandIndex::kTraceRayPayloadPreOpIdx) + return true; + else if (NumOps == HLOperandIndex::kTraceRay_NumOp && + OpIdx == HLOperandIndex::kTraceRayPayloadOpIdx) + return true; + break; + case IntrinsicOp::IOP_ReportHit: + if (OpIdx == HLOperandIndex::kReportIntersectionAttributeOpIdx) + return true; + break; + case IntrinsicOp::IOP_CallShader: + if (OpIdx == HLOperandIndex::kCallShaderPayloadOpIdx) + return true; + break; + case IntrinsicOp::MOP_DxHitObject_FromRayQuery: + if (NumOps == HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_NumOp && + OpIdx == + HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx) + return true; + break; + case IntrinsicOp::MOP_DxHitObject_TraceRay: + if (NumOps == HLOperandIndex::kHitObjectTraceRay_PreNumOp && + OpIdx == HLOperandIndex::kHitObjectTraceRay_PayloadPreOpIdx) + return true; + else if (NumOps == HLOperandIndex::kHitObjectTraceRay_NumOp && + OpIdx == HLOperandIndex::kHitObjectTraceRay_PayloadOpIdx) + return true; + break; + case IntrinsicOp::MOP_DxHitObject_Invoke: + if (OpIdx == HLOperandIndex::kHitObjectInvoke_PayloadOpIdx) + return true; + break; + case IntrinsicOp::MOP_DxHitObject_GetAttributes: + if (OpIdx == HLOperandIndex::kHitObjectGetAttributes_AttributeOpIdx) + return true; + break; + default: + break; + } + return false; +} + /// isSafeForScalarRepl - Check if instruction I is a safe use with regard to /// performing scalar replacement of alloca AI. The results are flagged in /// the Info parameter. Offset indicates the position within AI that is @@ -1535,16 +1585,9 @@ void isSafeForScalarRepl(Instruction *I, uint64_t Offset, AllocaInfo &Info) { // Most HL functions are safe for scalar repl. if (HLOpcodeGroup::NotHL == group) return MarkUnsafe(Info, User); - else if (HLOpcodeGroup::HLIntrinsic == group) { - // TODO: should we check HL parameter type for UDT overload instead of - // basing on IOP? - IntrinsicOp opcode = static_cast(GetHLOpcode(CI)); - if (IntrinsicOp::IOP_TraceRay == opcode || - IntrinsicOp::IOP_ReportHit == opcode || - IntrinsicOp::IOP_CallShader == opcode) { - return MarkUnsafe(Info, User); - } - } + else if (HLOpcodeGroup::HLIntrinsic == group && + isUDTIntrinsicArg(CI, U.getOperandNo())) + return MarkUnsafe(Info, User); } else { return MarkUnsafe(Info, User); } @@ -2660,12 +2703,11 @@ void SROA_Helper::RewriteBitCast(BitCastInst *BCI) { RewriteForGEP(cast(GEP), GEPBuilder); } -/// RewriteCallArg - For Functions which don't flat, -/// replace OldVal with alloca and -/// copy in copy out data between alloca and flattened NewElts -/// in CallInst. -void SROA_Helper::RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn, - bool bOut) { +/// memcpyAggCallArg - For an aggregate call argument, this replaces the +/// argument with an alloca and inserts a memcpy for input (if CopyIn) and +/// output (if CopyOut). +static void memcpyAggCallArg(CallInst *CI, unsigned ArgIdx, bool CopyIn, + bool CopyOut) { Function *F = CI->getParent()->getParent(); IRBuilder<> AllocaBuilder(dxilutil::FindAllocaInsertionPt(F)); const DataLayout &DL = F->getParent()->getDataLayout(); @@ -2675,17 +2717,79 @@ void SROA_Helper::RewriteCallArg(CallInst *CI, unsigned ArgIdx, bool bIn, Type *userTyElt = userTy->getElementType(); Value *Alloca = AllocaBuilder.CreateAlloca(userTyElt); IRBuilder<> Builder(CI); - if (bIn) { - MemCpyInst *cpy = cast(Builder.CreateMemCpy( - Alloca, userTyV, DL.getTypeAllocSize(userTyElt), false)); - RewriteMemIntrin(cpy, cpy->getRawSource()); - } + if (CopyIn) + Builder.CreateMemCpy(Alloca, userTyV, DL.getTypeAllocSize(userTyElt), + false); CI->setArgOperand(ArgIdx, Alloca); - if (bOut) { + if (CopyOut) { Builder.SetInsertPoint(CI->getNextNode()); - MemCpyInst *cpy = cast(Builder.CreateMemCpy( - userTyV, Alloca, DL.getTypeAllocSize(userTyElt), false)); - RewriteMemIntrin(cpy, cpy->getRawSource()); + Builder.CreateMemCpy(userTyV, Alloca, DL.getTypeAllocSize(userTyElt), + false); + } +} + +static void copyIntrinsicAggArgs(HLModule &HLM) { + // Iterate HLIntrinsic function users + // For specific intrinsics, use memcpyAggCallArg on aggregate args + // This ensures that the call does not directly use the pointer supplied, + // allowing certain arguments to be flattened, and UDT args to be correctly + // lowered. + for (Function &F : HLM.GetModule()->functions()) { + if (F.isIntrinsic() || !F.isDeclaration()) + continue; + if (GetHLOpcodeGroup(&F) != HLOpcodeGroup::HLIntrinsic) + continue; + // Iterate users + for (User *U : F.users()) { + if (CallInst *CI = dyn_cast(U)) { + switch (static_cast(GetHLOpcode(CI))) { + case IntrinsicOp::IOP_TraceRay: + memcpyAggCallArg(CI, HLOperandIndex::kTraceRayRayDescOpIdx, + /*CopyIn*/ true, /*CopyOut*/ false); + memcpyAggCallArg(CI, HLOperandIndex::kTraceRayPayloadPreOpIdx, + /*CopyIn*/ true, /*CopyOut*/ true); + break; + case IntrinsicOp::IOP_ReportHit: + memcpyAggCallArg(CI, + HLOperandIndex::kReportIntersectionAttributeOpIdx, + /*CopyIn*/ true, /*CopyOut*/ false); + break; + case IntrinsicOp::IOP_CallShader: + memcpyAggCallArg(CI, HLOperandIndex::kCallShaderPayloadOpIdx, + /*CopyIn*/ true, /*CopyOut*/ true); + break; + case IntrinsicOp::MOP_TraceRayInline: + memcpyAggCallArg(CI, HLOperandIndex::kTraceRayInlineRayDescOpIdx, + /*CopyIn*/ true, /*CopyOut*/ false); + break; + case IntrinsicOp::MOP_DxHitObject_FromRayQuery: + if (CI->getNumArgOperands() == + HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_NumOp) + memcpyAggCallArg( + CI, + HLOperandIndex::kHitObjectFromRayQuery_WithAttrs_AttributeOpIdx, + /*CopyIn*/ true, /*CopyOut*/ false); + break; + case IntrinsicOp::MOP_DxHitObject_MakeMiss: + memcpyAggCallArg(CI, HLOperandIndex::kHitObjectMakeMiss_RayDescOpIdx, + /*CopyIn*/ true, /*CopyOut*/ false); + break; + case IntrinsicOp::MOP_DxHitObject_TraceRay: + memcpyAggCallArg(CI, HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx, + /*CopyIn*/ true, /*CopyOut*/ false); + memcpyAggCallArg(CI, + HLOperandIndex::kHitObjectTraceRay_PayloadPreOpIdx, + /*CopyIn*/ true, /*CopyOut*/ true); + break; + case IntrinsicOp::MOP_DxHitObject_Invoke: + memcpyAggCallArg(CI, HLOperandIndex::kHitObjectInvoke_PayloadOpIdx, + /*CopyIn*/ true, /*CopyOut*/ true); + break; + default: + break; + } + } + } } } @@ -2739,13 +2843,26 @@ static CallInst *RewriteWithFlattenedHLIntrinsicCall(CallInst *CI, /// RewriteCall - Replace OldVal with flattened NewElts in CallInst. void SROA_Helper::RewriteCall(CallInst *CI) { - HLOpcodeGroup group = GetHLOpcodeGroupByName(CI->getCalledFunction()); - if (group != HLOpcodeGroup::NotHL) { + HLOpcodeGroup Group = GetHLOpcodeGroupByName(CI->getCalledFunction()); + if (Group != HLOpcodeGroup::NotHL) { unsigned opcode = GetHLOpcode(CI); - if (group == HLOpcodeGroup::HLIntrinsic) { + if (Group == HLOpcodeGroup::HLIntrinsic) { + // RayQuery this pointer replacement. + if (OldVal->getType()->isPointerTy() && + dxilutil::IsHLSLRayQueryType( + OldVal->getType()->getPointerElementType())) { + // For RayQuery methods, we want to replace the RayQuery this pointer + // with a load and use of the underlying handle value. + // This will allow elimination of RayQuery types earlier. + RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, + /*loadElts*/ true); + DeadInsts.push_back(CI); + return; + } + IntrinsicOp IOP = static_cast(opcode); switch (IOP) { - case IntrinsicOp::MOP_Append: { + case IntrinsicOp::MOP_Append: // Buffer Append already expand in code gen. // Must be OutputStream Append here. // Every Elt has a pointer type. @@ -2753,63 +2870,47 @@ void SROA_Helper::RewriteCall(CallInst *CI) { RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/ false); DeadInsts.push_back(CI); - } break; - case IntrinsicOp::IOP_TraceRay: { + return; + case IntrinsicOp::IOP_TraceRay: if (OldVal == CI->getArgOperand(HLOperandIndex::kTraceRayRayDescOpIdx)) { - RewriteCallArg(CI, HLOperandIndex::kTraceRayRayDescOpIdx, - /*bIn*/ true, /*bOut*/ false); - } else { - DXASSERT(OldVal == - CI->getArgOperand(HLOperandIndex::kTraceRayPayLoadOpIdx), - "else invalid TraceRay"); - RewriteCallArg(CI, HLOperandIndex::kTraceRayPayLoadOpIdx, - /*bIn*/ true, /*bOut*/ true); + RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, + /*loadElts*/ true); + DeadInsts.push_back(CI); + return; } - } break; - case IntrinsicOp::IOP_ReportHit: { - RewriteCallArg(CI, HLOperandIndex::kReportIntersectionAttributeOpIdx, - /*bIn*/ true, /*bOut*/ false); - } break; - case IntrinsicOp::IOP_CallShader: { - RewriteCallArg(CI, HLOperandIndex::kCallShaderPayloadOpIdx, - /*bIn*/ true, /*bOut*/ true); - } break; - case IntrinsicOp::MOP_DxHitObject_MakeMiss: { - if (OldVal == - CI->getArgOperand(HLOperandIndex::kHitObjectMakeMissRayDescOpIdx)) { + break; + case IntrinsicOp::MOP_DxHitObject_TraceRay: + if (OldVal == CI->getArgOperand( + HLOperandIndex::kHitObjectTraceRay_RayDescOpIdx)) { RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/ true); DeadInsts.push_back(CI); + return; } - } break; - case IntrinsicOp::MOP_TraceRayInline: { - if (OldVal == - CI->getArgOperand(HLOperandIndex::kTraceRayInlineRayDescOpIdx)) { + break; + case IntrinsicOp::MOP_DxHitObject_MakeMiss: + if (OldVal == CI->getArgOperand( + HLOperandIndex::kHitObjectMakeMiss_RayDescOpIdx)) { RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/ true); DeadInsts.push_back(CI); - break; + return; } - } - LLVM_FALLTHROUGH; - default: - // RayQuery this pointer replacement. - if (OldVal->getType()->isPointerTy() && - CI->getNumArgOperands() >= HLOperandIndex::kHandleOpIdx && - OldVal == CI->getArgOperand(HLOperandIndex::kHandleOpIdx) && - dxilutil::IsHLSLRayQueryType( - OldVal->getType()->getPointerElementType())) { - // For RayQuery methods, we want to replace the RayQuery this pointer - // with a load and use of the underlying handle value. - // This will allow elimination of RayQuery types earlier. + break; + case IntrinsicOp::MOP_TraceRayInline: + if (OldVal == + CI->getArgOperand(HLOperandIndex::kTraceRayInlineRayDescOpIdx)) { RewriteWithFlattenedHLIntrinsicCall(CI, OldVal, NewElts, /*loadElts*/ true); DeadInsts.push_back(CI); - break; + return; } - DXASSERT(0, "cannot flatten hlsl intrinsic."); + break; + default: + break; } + DXASSERT(0, "cannot flatten hlsl intrinsic."); } // TODO: check other high level dx operations if need to. } else { @@ -4390,6 +4491,9 @@ class SROA_Parameter_HLSL : public ModulePass { F->eraseFromParent(); } + // Expand flattened copy-in/copy-out for intrinsic UDT args: + copyIntrinsicAggArgs(*m_pHLModule); + // SROA globals and allocas. SROAGlobalAndAllocas(*m_pHLModule, m_HasDbgInfo); diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index 69ca2688c8..d8e8fa11bd 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -4472,8 +4472,8 @@ LoopVectorizationCostModel::selectVectorizationFactor(bool OptForSize) { unsigned WidestType = getWidestType(); unsigned WidestRegister = TTI.getRegisterBitWidth(true); - unsigned MaxSafeDepDist = -1U; - if (Legal->getMaxSafeDepDistBytes() != -1U) + unsigned MaxSafeDepDist = std::numeric_limits::max(); + if (Legal->getMaxSafeDepDistBytes() != std::numeric_limits::max()) MaxSafeDepDist = Legal->getMaxSafeDepDistBytes() * 8; WidestRegister = ((WidestRegister < MaxSafeDepDist) ? WidestRegister : MaxSafeDepDist); @@ -4638,7 +4638,7 @@ unsigned LoopVectorizationCostModel::selectInterleaveCount(bool OptForSize, return 1; // We used the distance for the interleave count. - if (Legal->getMaxSafeDepDistBytes() != -1U) + if (Legal->getMaxSafeDepDistBytes() != std::numeric_limits::max()) return 1; // Do not interleave loops with a relatively small trip count. diff --git a/tools/clang/include/clang/AST/DeclCXX.h b/tools/clang/include/clang/AST/DeclCXX.h index 36e0f99c82..3b07576545 100644 --- a/tools/clang/include/clang/AST/DeclCXX.h +++ b/tools/clang/include/clang/AST/DeclCXX.h @@ -465,10 +465,6 @@ class CXXRecordDecl : public RecordDecl { /// \brief Whether we are currently parsing base specifiers. bool IsParsingBaseSpecifiers : 1; - /// \brief Whether this class contains at least one member or base - /// class containing an HLSL vector longer than 4 elements. - bool HasHLSLLongVector : 1; - /// \brief The number of base class specifiers in Bases. unsigned NumBases; @@ -1022,13 +1018,6 @@ class CXXRecordDecl : public RecordDecl { return data().NeedOverloadResolutionForDestructor; } - // HLSL Change add HLSL Long vector bit. - /// \brief Determine whether this class contains an HLSL long vector - /// of over 4 elements. - bool hasHLSLLongVector() { return data().HasHLSLLongVector; } - /// \brief Set that this class contains an HLSL long vector of over 4 elements - bool setHasHLSLLongVector() { return data().HasHLSLLongVector = true; } - /// \brief Determine whether this class describes a lambda function object. bool isLambda() const { // An update record can't turn a non-lambda into a lambda. diff --git a/tools/clang/include/clang/AST/Expr.h b/tools/clang/include/clang/AST/Expr.h index 26eff309f7..55fd184a79 100644 --- a/tools/clang/include/clang/AST/Expr.h +++ b/tools/clang/include/clang/AST/Expr.h @@ -4510,7 +4510,9 @@ class GenericSelectionExpr : public Expr { Expr *getControllingExpr() { return cast(SubExprs[CONTROLLING]); } /// Whether this generic selection is result-dependent. - bool isResultDependent() const { return ResultIndex == -1U; } + bool isResultDependent() const { + return ResultIndex == std::numeric_limits::max(); + } /// The zero-based index of the result expression's generic association in /// the generic selection's association list. Defined only if the diff --git a/tools/clang/include/clang/AST/HlslTypes.h b/tools/clang/include/clang/AST/HlslTypes.h index 3a02824b3a..43c1effdb8 100644 --- a/tools/clang/include/clang/AST/HlslTypes.h +++ b/tools/clang/include/clang/AST/HlslTypes.h @@ -6,9 +6,6 @@ // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // // // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // -// All rights reserved. // -// // /// /// \file // /// \brief Defines the HLSL type system interface. // @@ -488,17 +485,21 @@ bool IsHLSLObjectWithImplicitMemberAccess(clang::QualType type); bool IsHLSLObjectWithImplicitROMemberAccess(clang::QualType type); bool IsHLSLRWNodeInputRecordType(clang::QualType type); bool IsHLSLRONodeInputRecordType(clang::QualType type); +bool IsHLSLDispatchNodeInputRecordType(clang::QualType type); +bool IsHLSLNodeRecordArrayType(clang::QualType type); bool IsHLSLNodeOutputType(clang::QualType type); +bool IsHLSLEmptyNodeRecordType(clang::QualType type); DXIL::NodeIOKind GetNodeIOType(clang::QualType type); bool IsHLSLStructuredBufferType(clang::QualType type); bool IsHLSLNumericOrAggregateOfNumericType(clang::QualType type); -bool IsHLSLNumericUserDefinedType(clang::QualType type); bool IsHLSLCopyableAnnotatableRecord(clang::QualType QT); bool IsHLSLBuiltinRayAttributeStruct(clang::QualType QT); bool IsHLSLAggregateType(clang::QualType type); clang::QualType GetHLSLResourceResultType(clang::QualType type); +clang::QualType GetHLSLNodeIOResultType(clang::ASTContext &astContext, + clang::QualType type); unsigned GetHLSLResourceTemplateUInt(clang::QualType type); bool IsIncompleteHLSLResourceArrayType(clang::ASTContext &context, clang::QualType type); diff --git a/tools/clang/include/clang/AST/OperationKinds.h b/tools/clang/include/clang/AST/OperationKinds.h index 3909c8b5e8..d19082d699 100644 --- a/tools/clang/include/clang/AST/OperationKinds.h +++ b/tools/clang/include/clang/AST/OperationKinds.h @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// // // This file enumerates the different kinds of operations that can be diff --git a/tools/clang/include/clang/Basic/Attr.td b/tools/clang/include/clang/Basic/Attr.td index 2518423565..1797597d17 100644 --- a/tools/clang/include/clang/Basic/Attr.td +++ b/tools/clang/include/clang/Basic/Attr.td @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// class DocumentationCategory { @@ -1418,7 +1415,8 @@ def VKDecorateExt : InheritableAttr { def VKDecorateIdExt : InheritableAttr { let Spellings = [CXX11<"vk", "ext_decorate_id">]; - let Subjects = SubjectList<[Function, Var, ParmVar, TypedefName], ErrorDiag>; + let Subjects = + SubjectList<[Function, Var, ParmVar, Field, TypedefName], ErrorDiag>; let Args = [UnsignedArgument<"decorate">, VariadicExprArgument<"arguments">]; let LangOpts = [SPIRV]; let Documentation = [Undocumented]; @@ -1426,7 +1424,8 @@ def VKDecorateIdExt : InheritableAttr { def VKDecorateStringExt : InheritableAttr { let Spellings = [CXX11<"vk", "ext_decorate_string">]; - let Subjects = SubjectList<[Function, Var, ParmVar, TypedefName], ErrorDiag>; + let Subjects = + SubjectList<[Function, Var, ParmVar, Field, TypedefName], ErrorDiag>; let Args = [UnsignedArgument<"decorate">, VariadicStringArgument<"arguments">]; let LangOpts = [SPIRV]; let Documentation = [Undocumented]; diff --git a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td index 6254e5fc71..cbd9412566 100644 --- a/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td +++ b/tools/clang/include/clang/Basic/DiagnosticSemaKinds.td @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// //===----------------------------------------------------------------------===// @@ -7558,8 +7555,6 @@ def err_hlsl_missing_type_specifier : Error< // Patterened after err_missing_typ "HLSL requires a type specifier for all declarations">; def err_hlsl_multiple_concrete_bases : Error< "multiple concrete base types specified">; -def err_hlsl_objectintemplateargument : Error< - "%0 is an object and cannot be used as a type parameter">; def err_hlsl_packoffset_requires_cbuffer : Error< "packoffset is only allowed in a constant buffer">; def warn_hlsl_packoffset_mix : Warning< @@ -7646,7 +7641,7 @@ def err_payload_requires_inout : Error< def err_attributes_requiers_in : Error< "intersection attributes parameter %0 must be 'in'">; def err_payload_attrs_must_be_udt : Error< - "%select{payload|attributes|callable}0 parameter %1 must be a user-defined type composed of only numeric types">; + "%select{payload|attributes|callable}0 %select{parameter %2|type}1 must be a user-defined type composed of only numeric types">; def err_shader_must_return_void : Error< "return type for '%0' shaders must be void">; def err_raytracing_entry_param_count : Error< @@ -7885,7 +7880,16 @@ def err_hlsl_unsupported_long_vector "cbuffers or tbuffers|user-defined struct parameter|" "entry function parameters|entry function return type|" "patch constant function parameters|patch constant function return type|" - "payload parameters}0 are not supported">; + "payload parameters|attributes}0 are not supported">; +// First %select options must match err_hlsl_unsupported_long_vector (same index used) +def err_hlsl_unsupported_object_context + : Error<"object %0 is not allowed in " + "%select{ConstantBuffers or TextureBuffers|" + "tessellation patches|geometry streams|node records|" + "cbuffers or tbuffers|user-defined struct parameter|" + "entry function parameters|entry function return type|" + "patch constant function parameters|patch constant function return type|" + "payload parameters|attributes|builtin template parameters|structured buffers|global variables|groupshared variables}1">; def err_hlsl_logical_binop_scalar : Error< "operands for short-circuiting logical binary operator must be scalar, for non-scalar types use '%select{and|or}0'">; def err_hlsl_ternary_scalar : Error< @@ -7970,8 +7974,6 @@ def err_hlsl_too_many_node_inputs : Error< "Node shader '%0' may not have more than one input record">; def err_hlsl_node_record_type : Error< "%0 is not valid as a node record type - struct/class required">; -def err_hlsl_node_record_object : Error< - "object %0 may not appear in a node record">; def err_hlsl_array_disallowed : Error< "%select{entry parameter|declaration}1 of type %0 may not be an array">; def err_hlsl_inputpatch_size: Error< @@ -8013,6 +8015,43 @@ def err_hlsl_reorder_unsupported_stage : Error< "dx::MaybeReorderThread is unavailable in shader stage '%0' (requires 'raygeneration')">; def err_hlsl_hitobject_unsupported_stage : Error< "dx::HitObject is unavailable in shader stage '%0' (requires 'raygeneration', 'closesthit' or 'miss')">; + +// Linear Algebra Operations +def err_hlsl_linalg_isunsigned_incorrect_for_given_type : Error< + "%0 must be %select{false|true}1 for vector of " + "%select{floating point|signed integer|unsigned integer}2 type">; +def err_hlsl_linalg_interpretation_value_incorrect : Error< + "%0 is an invalid %select{memory|register}1 interpretation value">; +def err_hlsl_linalg_matrix_layout_is_not_transposable : Error< + "RowMajor and ColumnMajor matrices are not transposable">; +def err_hlsl_linalg_optimal_matrix_layout_matrix_stride_must_be_zero : Error< + "for optimal matrix layout, matrix stride must be 0">; +def err_hlsl_linalg_matrix_dim_must_be_greater_than_zero: Error< + "matrix dimension must be greater than 0">; +def err_hlsl_linalg_matrix_layout_invalid : Error< + "matrix layout %0 is not valid, must be in the range [%1, %2]">; + +def err_hlsl_linalg_mul_muladd_output_vector_size_not_equal_to_matrix_M : Error< + "output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation">; +def err_hlsl_linalg_mul_muladd_unpacked_input_vector_size_not_equal_to_matrix_K : Error< + "unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation">; +def err_hlsl_linalg_mul_muladd_packed_input_vector_size_incorrect : Error< + "packed input vector length must be the smallest number that can hold matrix dim K values of the " + "packed(smaller) type in linalg mul/muladd operations">; +def err_hlsl_linalg_mul_muladd_isUnsigned_for_packed_input_must_be_true : Error< + "IsInputUnsigned must be true for packed input interpretations in linalg mul/muladd operations">; +def err_hlsl_linalg_mul_muladd_packed_input_vector_must_be_uint : Error< + "packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations">; +def err_hlsl_linalg_mul_muladd_invalid_dim: Error< + "matrix dimension %select{M|K when using unpacked input vectors|K " + "when using packed input vectors}0 must be less than %1, in a linalg " + "Mul/MulAdd operation">; + +def err_hlsl_linalg_outer_prod_acc_vector_type_mismatch : Error< + "input vectors of outerproductaccumulate must have the same element type">; +def err_hlsl_linalg_outer_prod_acc_matrix_layout_must_be_outer_prod_acc_optimal : Error< + "matrix layout for outerproductaccumulate must be %0">; + // HLSL Change Ends // SPIRV Change Starts @@ -8021,6 +8060,8 @@ def err_hlsl_vk_pointer_cast_alignment: Error< "Vulkan buffer pointer cannot be cast to greater alignment">; def err_hlsl_vk_static_pointer_cast_type: Error< "vk::static_pointer_cast() content type must be base class of argument's content type">; +def warn_spirv_node_shaders_experimental : Warning< + "SPIR-V implementation of node shaders is experimental and subject to change">; // SPIRV Change Ends let CategoryName = "OpenMP Issue" in { diff --git a/tools/clang/include/clang/SPIRV/AstTypeProbe.h b/tools/clang/include/clang/SPIRV/AstTypeProbe.h index 6302d43a88..9abea972c6 100644 --- a/tools/clang/include/clang/SPIRV/AstTypeProbe.h +++ b/tools/clang/include/clang/SPIRV/AstTypeProbe.h @@ -337,6 +337,10 @@ bool isOrContainsNonFpColMajorMatrix(const ASTContext &, const SpirvCodeGenOptions &, QualType type, const Decl *decl); +/// brief Returns true if the type is a boolean type or an aggragate type that +/// contains a boolean type. +bool isOrContainsBoolType(QualType type); + /// \brief Returns true if the given type is `vk::ext_result_id`. bool isExtResultIdType(QualType type); diff --git a/tools/clang/include/clang/SPIRV/FeatureManager.h b/tools/clang/include/clang/SPIRV/FeatureManager.h index 3c1871df37..94dc5bf1ab 100644 --- a/tools/clang/include/clang/SPIRV/FeatureManager.h +++ b/tools/clang/include/clang/SPIRV/FeatureManager.h @@ -57,6 +57,7 @@ enum class Extension { KHR_ray_query, EXT_shader_image_int64, KHR_physical_storage_buffer, + AMD_shader_enqueue, KHR_vulkan_memory_model, NV_compute_shader_derivatives, KHR_compute_shader_derivatives, diff --git a/tools/clang/include/clang/SPIRV/SpirvBuilder.h b/tools/clang/include/clang/SPIRV/SpirvBuilder.h index 5e03d1ef96..465f7313f1 100644 --- a/tools/clang/include/clang/SPIRV/SpirvBuilder.h +++ b/tools/clang/include/clang/SPIRV/SpirvBuilder.h @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVBUILDER_H #define LLVM_CLANG_SPIRV_SPIRVBUILDER_H @@ -437,6 +434,25 @@ class SpirvBuilder { QualType resultType, NonSemanticDebugPrintfInstructions instId, llvm::ArrayRef operands, SourceLocation); + SpirvInstruction *createIsNodePayloadValid(SpirvInstruction *payloadArray, + SpirvInstruction *nodeIndex, + SourceLocation); + + SpirvInstruction *createNodePayloadArrayLength(SpirvInstruction *payloadArray, + SourceLocation); + + SpirvInstruction *createAllocateNodePayloads(QualType resultType, + spv::Scope allocationScope, + SpirvInstruction *shaderIndex, + SpirvInstruction *recordCount, + SourceLocation); + + void createEnqueueOutputNodePayloads(SpirvInstruction *payload, + SourceLocation); + + SpirvInstruction *createFinishWritingNodePayload(SpirvInstruction *payload, + SourceLocation); + /// \brief Creates an OpMemoryBarrier or OpControlBarrier instruction with the /// given flags. If execution scope (exec) is provided, an OpControlBarrier /// is created; otherwise an OpMemoryBarrier is created. @@ -615,8 +631,15 @@ class SpirvBuilder { inline SpirvInstruction *addExecutionMode(SpirvFunction *entryPoint, spv::ExecutionMode em, llvm::ArrayRef params, - SourceLocation, - bool useIdParams = false); + SourceLocation); + + /// \brief Adds an execution mode to the module under construction if it does + /// not already exist. Return the newly added instruction or the existing + /// instruction, if one already exists. + inline SpirvInstruction * + addExecutionModeId(SpirvFunction *entryPoint, spv::ExecutionMode em, + llvm::ArrayRef params, + SourceLocation loc); /// \brief Adds an OpModuleProcessed instruction to the module under /// construction. @@ -759,6 +782,7 @@ class SpirvBuilder { llvm::ArrayRef constituents, bool specConst = false); SpirvConstant *getConstantNull(QualType); + SpirvConstant *getConstantString(llvm::StringRef str, bool specConst = false); SpirvUndef *getUndef(QualType); SpirvString *createString(llvm::StringRef str); @@ -963,17 +987,44 @@ SpirvBuilder::setDebugSource(uint32_t major, uint32_t minor, SpirvInstruction * SpirvBuilder::addExecutionMode(SpirvFunction *entryPoint, spv::ExecutionMode em, llvm::ArrayRef params, - SourceLocation loc, bool useIdParams) { + SourceLocation loc) { SpirvExecutionMode *mode = nullptr; - SpirvExecutionMode *existingInstruction = + SpirvExecutionModeBase *existingInstruction = mod->findExecutionMode(entryPoint, em); if (!existingInstruction) { - mode = new (context) - SpirvExecutionMode(loc, entryPoint, em, params, useIdParams); + mode = new (context) SpirvExecutionMode(loc, entryPoint, em, params); + mod->addExecutionMode(mode); + } else { + // No execution mode can be used with both OpExecutionMode and + // OpExecutionModeId. If this assert is triggered, then either this + // `addExecutionModeId` should have been called with `em` or the existing + // instruction is wrong. + assert(existingInstruction->getKind() == + SpirvInstruction::IK_ExecutionMode); + mode = cast(existingInstruction); + } + + return mode; +} + +SpirvInstruction *SpirvBuilder::addExecutionModeId( + SpirvFunction *entryPoint, spv::ExecutionMode em, + llvm::ArrayRef params, SourceLocation loc) { + SpirvExecutionModeId *mode = nullptr; + SpirvExecutionModeBase *existingInstruction = + mod->findExecutionMode(entryPoint, em); + if (!existingInstruction) { + mode = new (context) SpirvExecutionModeId(loc, entryPoint, em, params); mod->addExecutionMode(mode); } else { - mode = existingInstruction; + // No execution mode can be used with both OpExecutionMode and + // OpExecutionModeId. If this assert is triggered, then either this + // `addExecutionMode` should have been called with `em` or the existing + // instruction is wrong. + assert(existingInstruction->getKind() == + SpirvInstruction::IK_ExecutionModeId); + mode = cast(existingInstruction); } return mode; diff --git a/tools/clang/include/clang/SPIRV/SpirvContext.h b/tools/clang/include/clang/SPIRV/SpirvContext.h index c18c139642..8e0458e731 100644 --- a/tools/clang/include/clang/SPIRV/SpirvContext.h +++ b/tools/clang/include/clang/SPIRV/SpirvContext.h @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVCONTEXT_H #define LLVM_CLANG_SPIRV_SPIRVCONTEXT_H @@ -101,6 +98,21 @@ struct RuntimeArrayTypeMapInfo { } }; +// Provides DenseMapInfo for NodePayloadArrayType so we can create a DenseSet of +// node payload array types. +struct NodePayloadArrayTypeMapInfo { + static inline NodePayloadArrayType *getEmptyKey() { return nullptr; } + static inline NodePayloadArrayType *getTombstoneKey() { return nullptr; } + static unsigned getHashValue(const NodePayloadArrayType *Val) { + return llvm::hash_combine(Val->getElementType(), Val->getNodeDecl()); + } + static bool isEqual(const NodePayloadArrayType *LHS, + const NodePayloadArrayType *RHS) { + // Either both are null, or both should have the same underlying type. + return (LHS == RHS) || (LHS && RHS && *LHS == *RHS); + } +}; + // Provides DenseMapInfo for ImageType so we can create a DenseSet of // image types. struct ImageTypeMapInfo { @@ -273,6 +285,9 @@ class SpirvContext { const RuntimeArrayType * getRuntimeArrayType(const SpirvType *elemType, llvm::Optional arrayStride); + const NodePayloadArrayType * + getNodePayloadArrayType(const SpirvType *elemType, + const ParmVarDecl *nodeDecl); const StructType *getStructType( llvm::ArrayRef fields, llvm::StringRef name, @@ -349,6 +364,7 @@ class SpirvContext { bool isDS() const { return curShaderModelKind == ShaderModelKind::Domain; } bool isCS() const { return curShaderModelKind == ShaderModelKind::Compute; } bool isLib() const { return curShaderModelKind == ShaderModelKind::Library; } + bool isNode() const { return curShaderModelKind == ShaderModelKind::Node; } bool isRay() const { return curShaderModelKind >= ShaderModelKind::RayGeneration && curShaderModelKind <= ShaderModelKind::Callable; @@ -440,6 +456,31 @@ class SpirvContext { instructionsWithLoweredType.end(); } + void registerDispatchGridIndex(const RecordDecl *decl, unsigned index) { + auto iter = dispatchGridIndices.find(decl); + if (iter == dispatchGridIndices.end()) { + dispatchGridIndices[decl] = index; + } + } + + llvm::Optional getDispatchGridIndex(const RecordDecl *decl) { + auto iter = dispatchGridIndices.find(decl); + if (iter != dispatchGridIndices.end()) { + return iter->second; + } + return llvm::None; + } + + void registerNodeDeclPayloadType(const NodePayloadArrayType *type, + const ParmVarDecl *decl) { + nodeDecls[decl] = type; + } + + const NodePayloadArrayType *getNodeDeclPayloadType(const ParmVarDecl *decl) { + auto iter = nodeDecls.find(decl); + return iter == nodeDecls.end() ? nullptr : iter->second; + } + private: /// \brief The allocator used to create SPIR-V entity objects. /// @@ -484,6 +525,8 @@ class SpirvContext { llvm::DenseSet arrayTypes; llvm::DenseSet runtimeArrayTypes; + llvm::DenseSet + nodePayloadArrayTypes; llvm::SmallVector structTypes; llvm::SmallVector hybridStructTypes; llvm::DenseMap pointerTypes; @@ -510,6 +553,9 @@ class SpirvContext { llvm::StringMap debugInfo; SpirvDebugInstruction *currentLexicalScope; + // Mapping from graphics node input record types to member decoration maps. + llvm::MapVector dispatchGridIndices; + // Mapping from SPIR-V type to debug type instruction. // The purpose is not to generate several DebugType* instructions for the same // type if the type is used for several variables. @@ -541,6 +587,10 @@ class SpirvContext { // Set of instructions that already have lowered SPIR-V types. llvm::DenseSet instructionsWithLoweredType; + + // Mapping from shader entry function parameter declaration to node payload + // array type. + llvm::MapVector nodeDecls; }; } // end namespace spirv diff --git a/tools/clang/include/clang/SPIRV/SpirvInstruction.h b/tools/clang/include/clang/SPIRV/SpirvInstruction.h index f49a295610..52f4128a6c 100644 --- a/tools/clang/include/clang/SPIRV/SpirvInstruction.h +++ b/tools/clang/include/clang/SPIRV/SpirvInstruction.h @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVINSTRUCTION_H #define LLVM_CLANG_SPIRV_SPIRVINSTRUCTION_H @@ -57,6 +54,7 @@ class SpirvInstruction { IK_MemoryModel, // OpMemoryModel IK_EntryPoint, // OpEntryPoint IK_ExecutionMode, // OpExecutionMode + IK_ExecutionModeId, // OpExecutionModeId IK_String, // OpString (debug) IK_Source, // OpSource (debug) IK_ModuleProcessed, // OpModuleProcessed (debug) @@ -69,6 +67,7 @@ class SpirvInstruction { IK_ConstantInteger, IK_ConstantFloat, IK_ConstantComposite, + IK_ConstantString, IK_ConstantNull, // Pointer <-> uint conversions. @@ -167,6 +166,13 @@ class SpirvInstruction { IK_DebugTypeMember, IK_DebugTypeTemplate, IK_DebugTypeTemplateParameter, + + // For workgraph instructions + IK_IsNodePayloadValid, + IK_NodePayloadArrayLength, + IK_AllocateNodePayloads, + IK_EnqueueNodePayloads, + IK_FinishWritingNodePayload, }; // All instruction classes should include a releaseMemory method. @@ -404,12 +410,34 @@ class SpirvEntryPoint : public SpirvInstruction { llvm::SmallVector interfaceVec; }; +class SpirvExecutionModeBase : public SpirvInstruction { +public: + SpirvExecutionModeBase(Kind kind, spv::Op opcode, SourceLocation loc, + SpirvFunction *entryPointFunction, + spv::ExecutionMode executionMode) + : SpirvInstruction(kind, opcode, QualType(), loc), + entryPoint(entryPointFunction), execMode(executionMode) {} + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvExecutionModeBase) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { return false; } + + bool invokeVisitor(Visitor *v) override; + + SpirvFunction *getEntryPoint() const { return entryPoint; } + spv::ExecutionMode getExecutionMode() const { return execMode; } + +private: + SpirvFunction *entryPoint; + spv::ExecutionMode execMode; +}; + /// \brief OpExecutionMode and OpExecutionModeId instructions -class SpirvExecutionMode : public SpirvInstruction { +class SpirvExecutionMode : public SpirvExecutionModeBase { public: SpirvExecutionMode(SourceLocation loc, SpirvFunction *entryPointFunction, - spv::ExecutionMode, llvm::ArrayRef params, - bool usesIdParams); + spv::ExecutionMode, llvm::ArrayRef params); DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvExecutionMode) @@ -430,6 +458,28 @@ class SpirvExecutionMode : public SpirvInstruction { llvm::SmallVector params; }; +/// \brief OpExecutionModeId +class SpirvExecutionModeId : public SpirvExecutionModeBase { +public: + SpirvExecutionModeId(SourceLocation loc, SpirvFunction *entryPointFunction, + spv::ExecutionMode em, + llvm::ArrayRef params); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvExecutionModeId) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_ExecutionModeId; + } + + bool invokeVisitor(Visitor *v) override; + + llvm::ArrayRef getParams() const { return params; } + +private: + llvm::SmallVector params; +}; + /// \brief OpString instruction class SpirvString : public SpirvInstruction { public: @@ -1018,6 +1068,119 @@ class SpirvBarrier : public SpirvInstruction { llvm::Optional executionScope; }; +/// \brief OpIsNodePayloadValidAMDX instruction +class SpirvIsNodePayloadValid : public SpirvInstruction { +public: + SpirvIsNodePayloadValid(QualType resultType, SourceLocation loc, + SpirvInstruction *payloadArray, + SpirvInstruction *nodeIndex); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvIsNodePayloadValid) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_IsNodePayloadValid; + } + + bool invokeVisitor(Visitor *v) override; + + SpirvInstruction *getPayloadArray() { return payloadArray; } + SpirvInstruction *getNodeIndex() { return nodeIndex; } + +private: + SpirvInstruction *payloadArray; + SpirvInstruction *nodeIndex; +}; + +/// \brief OpNodePayloadArrayLengthAMDX instruction +class SpirvNodePayloadArrayLength : public SpirvInstruction { +public: + SpirvNodePayloadArrayLength(QualType resultType, SourceLocation loc, + SpirvInstruction *payloadArray); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvNodePayloadArrayLength) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_NodePayloadArrayLength; + } + + bool invokeVisitor(Visitor *v) override; + + SpirvInstruction *getPayloadArray() { return payloadArray; } + +private: + SpirvInstruction *payloadArray; +}; + +/// \brief OpAllocateNodePayloadsAMDX instruction +class SpirvAllocateNodePayloads : public SpirvInstruction { +public: + SpirvAllocateNodePayloads(QualType resultType, SourceLocation loc, + spv::Scope allocationScope, + SpirvInstruction *shaderIndex, + SpirvInstruction *recordCount); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvAllocateNodePayloads) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_AllocateNodePayloads; + } + + bool invokeVisitor(Visitor *v) override; + + spv::Scope getAllocationScope() { return allocationScope; } + SpirvInstruction *getShaderIndex() { return shaderIndex; } + SpirvInstruction *getRecordCount() { return recordCount; } + +private: + spv::Scope allocationScope; + SpirvInstruction *shaderIndex; + SpirvInstruction *recordCount; +}; + +/// \brief OpReleaseOutputNodePayloadAMDX instruction +class SpirvEnqueueNodePayloads : public SpirvInstruction { +public: + SpirvEnqueueNodePayloads(SourceLocation loc, SpirvInstruction *payload); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvEnqueueNodePayloads) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_EnqueueNodePayloads; + } + + bool invokeVisitor(Visitor *v) override; + + SpirvInstruction *getPayload() { return payload; } + +private: + SpirvInstruction *payload; +}; + +/// \brief OpFinishWritingNodePayloadAMDX instruction +class SpirvFinishWritingNodePayload : public SpirvInstruction { +public: + SpirvFinishWritingNodePayload(QualType resultType, SourceLocation loc, + SpirvInstruction *payload); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvFinishWritingNodePayload) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_FinishWritingNodePayload; + } + + bool invokeVisitor(Visitor *v) override; + + SpirvInstruction *getPayload() { return payload; } + +private: + SpirvInstruction *payload; +}; + /// \brief Represents SPIR-V binary operation instructions. /// /// This class includes: @@ -1314,6 +1477,27 @@ class SpirvConstantNull : public SpirvConstant { bool operator==(const SpirvConstantNull &that) const; }; +class SpirvConstantString : public SpirvConstant { +public: + SpirvConstantString(llvm::StringRef stringLiteral, bool isSpecConst = false); + + DEFINE_RELEASE_MEMORY_FOR_CLASS(SpirvConstantString) + + // For LLVM-style RTTI + static bool classof(const SpirvInstruction *inst) { + return inst->getKind() == IK_ConstantString; + } + + bool invokeVisitor(Visitor *v) override; + + bool operator==(const SpirvConstantString &that) const; + + llvm::StringRef getString() const { return str; } + +private: + std::string str; +}; + class SpirvConvertPtrToU : public SpirvInstruction { public: SpirvConvertPtrToU(SpirvInstruction *ptr, QualType type, diff --git a/tools/clang/include/clang/SPIRV/SpirvModule.h b/tools/clang/include/clang/SPIRV/SpirvModule.h index 298c06d65e..9ab0c296b8 100644 --- a/tools/clang/include/clang/SPIRV/SpirvModule.h +++ b/tools/clang/include/clang/SPIRV/SpirvModule.h @@ -119,11 +119,11 @@ class SpirvModule { // Returns an existing execution mode instruction that is the same as em if it // exists. Return nullptr otherwise. - SpirvExecutionMode *findExecutionMode(SpirvFunction *entryPoint, - spv::ExecutionMode em); + SpirvExecutionModeBase *findExecutionMode(SpirvFunction *entryPoint, + spv::ExecutionMode em); // Adds an execution mode to the module. - void addExecutionMode(SpirvExecutionMode *); + void addExecutionMode(SpirvExecutionModeBase *em); // Adds an extension to the module. Returns true if the extension was added. // Returns false otherwise (e.g. if the extension already existed). @@ -194,7 +194,7 @@ class SpirvModule { llvm::SmallVector extInstSets; SpirvMemoryModel *memoryModel; llvm::SmallVector entryPoints; - llvm::SmallVector executionModes; + llvm::SmallVector executionModes; llvm::SmallVector constStrings; std::vector sources; std::vector moduleProcesses; diff --git a/tools/clang/include/clang/SPIRV/SpirvType.h b/tools/clang/include/clang/SPIRV/SpirvType.h index 00a00ef238..7966e3e0de 100644 --- a/tools/clang/include/clang/SPIRV/SpirvType.h +++ b/tools/clang/include/clang/SPIRV/SpirvType.h @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVTYPE_H #define LLVM_CLANG_SPIRV_SPIRVTYPE_H @@ -54,6 +51,7 @@ class SpirvType { TK_SampledImage, TK_Array, TK_RuntimeArray, + TK_NodePayloadArrayAMD, TK_Struct, TK_Pointer, TK_ForwardPointer, @@ -294,6 +292,26 @@ class RuntimeArrayType : public SpirvType { llvm::Optional stride; }; +class NodePayloadArrayType : public SpirvType { +public: + NodePayloadArrayType(const SpirvType *elemType, const ParmVarDecl *decl) + : SpirvType(TK_NodePayloadArrayAMD), elementType(elemType), + nodeDecl(decl) {} + + static bool classof(const SpirvType *t) { + return t->getKind() == TK_NodePayloadArrayAMD; + } + + bool operator==(const NodePayloadArrayType &that) const; + + const SpirvType *getElementType() const { return elementType; } + const ParmVarDecl *getNodeDecl() const { return nodeDecl; } + +private: + const SpirvType *elementType; + const ParmVarDecl *nodeDecl; +}; + // The StructType is the lowered type that best represents what a structure type // is in SPIR-V. Contains all necessary information for properly emitting a // SPIR-V structure type. @@ -630,6 +648,8 @@ bool SpirvType::isOrContainsType(const SpirvType *type) { return isOrContainsType(pointerType->getPointeeType()); if (const auto *raType = dyn_cast(type)) return isOrContainsType(raType->getElementType()); + if (const auto *npaType = dyn_cast(type)) + return isOrContainsType(npaType->getElementType()); if (const auto *imgType = dyn_cast(type)) return isOrContainsType(imgType->getSampledType()); if (const auto *sampledImageType = dyn_cast(type)) diff --git a/tools/clang/include/clang/SPIRV/SpirvVisitor.h b/tools/clang/include/clang/SPIRV/SpirvVisitor.h index 93682518a1..a6de26c807 100644 --- a/tools/clang/include/clang/SPIRV/SpirvVisitor.h +++ b/tools/clang/include/clang/SPIRV/SpirvVisitor.h @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_SPIRVVISITOR_H #define LLVM_CLANG_SPIRV_SPIRVVISITOR_H @@ -64,7 +61,7 @@ class Visitor { DEFINE_VISIT_METHOD(SpirvExtInstImport) DEFINE_VISIT_METHOD(SpirvMemoryModel) DEFINE_VISIT_METHOD(SpirvEntryPoint) - DEFINE_VISIT_METHOD(SpirvExecutionMode) + DEFINE_VISIT_METHOD(SpirvExecutionModeBase) DEFINE_VISIT_METHOD(SpirvString) DEFINE_VISIT_METHOD(SpirvSource) DEFINE_VISIT_METHOD(SpirvModuleProcessed) @@ -85,6 +82,11 @@ class Visitor { DEFINE_VISIT_METHOD(SpirvAccessChain) DEFINE_VISIT_METHOD(SpirvAtomic) DEFINE_VISIT_METHOD(SpirvBarrier) + DEFINE_VISIT_METHOD(SpirvIsNodePayloadValid) + DEFINE_VISIT_METHOD(SpirvNodePayloadArrayLength) + DEFINE_VISIT_METHOD(SpirvAllocateNodePayloads) + DEFINE_VISIT_METHOD(SpirvEnqueueNodePayloads) + DEFINE_VISIT_METHOD(SpirvFinishWritingNodePayload) DEFINE_VISIT_METHOD(SpirvBinaryOp) DEFINE_VISIT_METHOD(SpirvBitFieldExtract) DEFINE_VISIT_METHOD(SpirvBitFieldInsert) @@ -92,6 +94,7 @@ class Visitor { DEFINE_VISIT_METHOD(SpirvConstantInteger) DEFINE_VISIT_METHOD(SpirvConstantFloat) DEFINE_VISIT_METHOD(SpirvConstantComposite) + DEFINE_VISIT_METHOD(SpirvConstantString) DEFINE_VISIT_METHOD(SpirvConstantNull) DEFINE_VISIT_METHOD(SpirvConvertPtrToU) DEFINE_VISIT_METHOD(SpirvConvertUToPtr) diff --git a/tools/clang/include/clang/Sema/ExternalSemaSource.h b/tools/clang/include/clang/Sema/ExternalSemaSource.h index 91578e2440..b10d649cc6 100644 --- a/tools/clang/include/clang/Sema/ExternalSemaSource.h +++ b/tools/clang/include/clang/Sema/ExternalSemaSource.h @@ -211,10 +211,9 @@ class ExternalSemaSource : public ExternalASTSource { // add call candidates to the given expression. It returns 'true' // if standard overload search should be suppressed; false otherwise. virtual bool AddOverloadedCallCandidates(UnresolvedLookupExpr *ULE, - ArrayRef Args, - OverloadCandidateSet &CandidateSet, - bool PartialOverloading) - { + ArrayRef Args, + OverloadCandidateSet &CandidateSet, + Scope *S, bool PartialOverloading) { return false; } diff --git a/tools/clang/include/clang/Sema/Overload.h b/tools/clang/include/clang/Sema/Overload.h index 89de4ce984..473af49cab 100644 --- a/tools/clang/include/clang/Sema/Overload.h +++ b/tools/clang/include/clang/Sema/Overload.h @@ -57,7 +57,7 @@ namespace clang { /// convert an argument to a parameter's type. The enumerator values /// match with Table 9 of (C++ 13.3.3.1.1) and are listed such that /// better conversion kinds have smaller values. - enum ImplicitConversionKind { + enum ImplicitConversionKind : unsigned int { ICK_Identity = 0, ///< Identity conversion (no conversion) ICK_Lvalue_To_Rvalue, ///< Lvalue-to-rvalue conversion (C++ 4.1) ICK_Array_To_Pointer, ///< Array-to-pointer conversion (C++ 4.2) @@ -79,27 +79,28 @@ namespace clang { ICK_Vector_Conversion, ///< Vector conversions ICK_Vector_Splat, ///< A vector splat from an arithmetic type ICK_Complex_Real, ///< Complex-real conversions (C99 6.3.1.7) - ICK_Block_Pointer_Conversion, ///< Block Pointer conversions + ICK_Block_Pointer_Conversion, ///< Block Pointer conversions ICK_TransparentUnionConversion, ///< Transparent Union Conversions - ICK_Writeback_Conversion, ///< Objective-C ARC writeback conversion + ICK_Writeback_Conversion, ///< Objective-C ARC writeback conversion ICK_Zero_Event_Conversion, ///< Zero constant to event (OpenCL1.2 6.12.10) // HLSL Change Starts - // The following conversion types also imply a potential followup + // The following conversion types also imply a potential followup // ComponentConversion. // List is roughly ordered to preserve the property: // "better conversion kinds have smaller values" - // Unfortunately, this property isn't really possible to preserve due + // Unfortunately, this property isn't really possible to preserve due // to potential additional component conversion. ICK_HLSLVector_Scalar, ///< HLSLVector/Matrix to scalar ICK_HLSLVector_Conversion, ///< HLSLVector/Matrix conversion - ICK_Flat_Conversion, ///< Flat assignment conversion for HLSL (inline conversion, straddled) + ICK_Flat_Conversion, ///< Flat assignment conversion for HLSL (inline + ///< conversion, straddled) ICK_HLSLVector_Splat, ///< HLSLVector/Matrix splat ICK_HLSLVector_Truncation, ///< HLSLVector/Matrix truncation ICK_HLSL_Derived_To_Base, ///< HLSL Derived-to-base // HLSL Change Ends - ICK_Num_Conversion_Kinds ///< The number of conversion kinds + ICK_Num_Conversion_Kinds ///< The number of conversion kinds }; /// ImplicitConversionRank - The rank of an implicit conversion diff --git a/tools/clang/include/clang/Sema/Sema.h b/tools/clang/include/clang/Sema/Sema.h index 755c7e0755..6eb0aba801 100644 --- a/tools/clang/include/clang/Sema/Sema.h +++ b/tools/clang/include/clang/Sema/Sema.h @@ -2495,9 +2495,14 @@ class Sema { DeclAccessPair FoundDecl, FunctionDecl *Fn); + // HLSL Change Begin + void CollectNamespaceContexts(Scope *, + SmallVectorImpl &); + // HLSL Change End void AddOverloadedCallCandidates(UnresolvedLookupExpr *ULE, ArrayRef Args, OverloadCandidateSet &CandidateSet, + Scope *S, // HLSL Change bool PartialOverloading = false); // An enum used to represent the different possible results of building a @@ -3806,8 +3811,7 @@ class Sema { void DiagnoseHLSLDeclAttr(const Decl *D, const Attr *A); void DiagnoseCoherenceMismatch(const Expr *SrcExpr, QualType TargetType, SourceLocation Loc); - void CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, - const FunctionProtoType *Proto); + void CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall); void DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM, hlsl::DXIL::ShaderKind EntrySK, hlsl::DXIL::NodeLaunchType NodeLaunchTy, @@ -8826,8 +8830,6 @@ class Sema { bool AllowOnePastEnd=true, bool IndexNegated=false); // HLSL Change Starts - checking array subscript access to vector or matrix member void CheckHLSLArrayAccess(const Expr *expr); - bool CheckHLSLIntrinsicCall(FunctionDecl *FDecl, CallExpr *TheCall); - bool CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall); // HLSL Change ends void CheckArrayAccess(const Expr *E); // Used to grab the relevant information from a FormatAttr and a diff --git a/tools/clang/include/clang/Sema/SemaHLSL.h b/tools/clang/include/clang/Sema/SemaHLSL.h index 59d99ab4c5..80ce8ddd7d 100644 --- a/tools/clang/include/clang/Sema/SemaHLSL.h +++ b/tools/clang/include/clang/Sema/SemaHLSL.h @@ -59,6 +59,38 @@ bool DiagnoseNodeStructArgument(clang::Sema *self, clang::QualType ArgTy, bool &Empty, const clang::FieldDecl *FD = nullptr); +// Keep this in sync with err_hlsl_unsupported_object in DiagnosticSemaKinds.td +enum class TypeDiagContext { + // Indices that the type context is valid and no diagnostics should be emitted + // for this type category. + Valid = -1, + // Supported indices for both `err_hlsl_unsupported_object_context` and + // `err_hlsl_unsupported_long_vector` + ConstantBuffersOrTextureBuffers = 0, + TessellationPatches = 1, + GeometryStreams = 2, + NodeRecords = 3, + CBuffersOrTBuffers = 4, + UserDefinedStructParameter = 5, + EntryFunctionParameters = 6, + EntryFunctionReturnType = 7, + PatchConstantFunctionParameters = 8, + PatchConstantFunctionReturnType = 9, + PayloadParameters = 10, + Attributes = 11, + TypeParameter = 12, + LongVecDiagMaxSelectIndex = TypeParameter, + // Below only supported for `err_hlsl_diag_unsupported_object_context` + StructuredBuffers = 13, + GlobalVariables = 14, + GroupShared = 15, + DiagMaxSelectIndex = 15, +}; +bool DiagnoseTypeElements(clang::Sema &S, clang::SourceLocation Loc, + clang::QualType Ty, TypeDiagContext ObjDiagContext, + TypeDiagContext LongVecDiagContext, + const clang::FieldDecl *FD = nullptr); + void DiagnoseControlFlowConditionForHLSL(clang::Sema *self, clang::Expr *condExpr, llvm::StringRef StmtName); diff --git a/tools/clang/lib/AST/ASTContextHLSL.cpp b/tools/clang/lib/AST/ASTContextHLSL.cpp index 0a688c03fa..913b28ced8 100644 --- a/tools/clang/lib/AST/ASTContextHLSL.cpp +++ b/tools/clang/lib/AST/ASTContextHLSL.cpp @@ -6,9 +6,6 @@ // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // // // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // -// All rights reserved. // -// // // This file implements the ASTContext interface for HLSL. // // // /////////////////////////////////////////////////////////////////////////////// diff --git a/tools/clang/lib/AST/DeclCXX.cpp b/tools/clang/lib/AST/DeclCXX.cpp index baed44667f..8023a0a588 100644 --- a/tools/clang/lib/AST/DeclCXX.cpp +++ b/tools/clang/lib/AST/DeclCXX.cpp @@ -72,8 +72,8 @@ CXXRecordDecl::DefinitionData::DefinitionData(CXXRecordDecl *D) ImplicitCopyAssignmentHasConstParam(true), HasDeclaredCopyConstructorWithConstParam(false), HasDeclaredCopyAssignmentWithConstParam(false), IsLambda(false), - IsParsingBaseSpecifiers(false), HasHLSLLongVector(false), NumBases(0), - NumVBases(0), Bases(), VBases(), Definition(D), FirstFriend() {} + IsParsingBaseSpecifiers(false), NumBases(0), NumVBases(0), Bases(), + VBases(), Definition(D), FirstFriend() {} // HLSL Change End - Add HasLongVector and clang-format CXXBaseSpecifier *CXXRecordDecl::DefinitionData::getBasesSlowCase() const { @@ -203,11 +203,6 @@ CXXRecordDecl::setBases(CXXBaseSpecifier const * const *Bases, if (!BaseClassDecl->isStandardLayout()) data().IsStandardLayout = false; - // HLSL Change Begin - Propagate presence of long vector to child classes. - if (BaseClassDecl->hasHLSLLongVector()) - data().HasHLSLLongVector = true; - // HLSL Change End - // Record if this base is the first non-literal field or base. if (!hasNonLiteralTypeFieldsOrBases() && !BaseType->isLiteralType(C)) data().HasNonLiteralTypeFieldsOrBases = true; @@ -389,11 +384,6 @@ void CXXRecordDecl::addedClassSubobject(CXXRecordDecl *Subobj) { data().NeedOverloadResolutionForMoveConstructor = true; data().NeedOverloadResolutionForDestructor = true; } - - // HLSL Change Begin - Propagate presence of long vector to child classes. - if (Subobj->hasHLSLLongVector()) - data().HasHLSLLongVector = true; - // HLSL Change End } /// Callback function for CXXRecordDecl::forallBases that acknowledges diff --git a/tools/clang/lib/AST/Expr.cpp b/tools/clang/lib/AST/Expr.cpp index c6dc21217e..2d039a7e98 100644 --- a/tools/clang/lib/AST/Expr.cpp +++ b/tools/clang/lib/AST/Expr.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// // // This file implements the Expr class and subclasses. @@ -3886,25 +3883,21 @@ GenericSelectionExpr::GenericSelectionExpr(const ASTContext &Context, std::copy(AssocExprs.begin(), AssocExprs.end(), SubExprs+END_EXPR); } -GenericSelectionExpr::GenericSelectionExpr(const ASTContext &Context, - SourceLocation GenericLoc, Expr *ControllingExpr, - ArrayRef AssocTypes, - ArrayRef AssocExprs, - SourceLocation DefaultLoc, - SourceLocation RParenLoc, - bool ContainsUnexpandedParameterPack) - : Expr(GenericSelectionExprClass, - Context.DependentTy, - VK_RValue, - OK_Ordinary, - /*isTypeDependent=*/true, - /*isValueDependent=*/true, - /*isInstantiationDependent=*/true, - ContainsUnexpandedParameterPack), - AssocTypes(new (Context) TypeSourceInfo*[AssocTypes.size()]), - SubExprs(new (Context) Stmt*[END_EXPR+AssocExprs.size()]), - NumAssocs(AssocExprs.size()), ResultIndex(-1U), GenericLoc(GenericLoc), - DefaultLoc(DefaultLoc), RParenLoc(RParenLoc) { +GenericSelectionExpr::GenericSelectionExpr( + const ASTContext &Context, SourceLocation GenericLoc, Expr *ControllingExpr, + ArrayRef AssocTypes, ArrayRef AssocExprs, + SourceLocation DefaultLoc, SourceLocation RParenLoc, + bool ContainsUnexpandedParameterPack) + : Expr(GenericSelectionExprClass, Context.DependentTy, VK_RValue, + OK_Ordinary, + /*isTypeDependent=*/true, + /*isValueDependent=*/true, + /*isInstantiationDependent=*/true, ContainsUnexpandedParameterPack), + AssocTypes(new(Context) TypeSourceInfo *[AssocTypes.size()]), + SubExprs(new(Context) Stmt *[END_EXPR + AssocExprs.size()]), + NumAssocs(AssocExprs.size()), + ResultIndex(std::numeric_limits::max()), GenericLoc(GenericLoc), + DefaultLoc(DefaultLoc), RParenLoc(RParenLoc) { SubExprs[CONTROLLING] = ControllingExpr; assert(AssocTypes.size() == AssocExprs.size()); std::copy(AssocTypes.begin(), AssocTypes.end(), this->AssocTypes); diff --git a/tools/clang/lib/AST/ExprConstant.cpp b/tools/clang/lib/AST/ExprConstant.cpp index 69e0760bce..c24e44022f 100644 --- a/tools/clang/lib/AST/ExprConstant.cpp +++ b/tools/clang/lib/AST/ExprConstant.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// // // This file implements the Expr constant evaluator. @@ -6558,7 +6555,7 @@ bool IntExprEvaluator::VisitCallExpr(const CallExpr *E) { // handle all cases where the expression has side-effects. if (E->getArg(0)->HasSideEffects(Info.Ctx)) { if (E->getArg(1)->EvaluateKnownConstInt(Info.Ctx).getZExtValue() <= 1) - return Success(-1ULL, E); + return Success(~0ULL, E); return Success(0, E); } @@ -6573,7 +6570,7 @@ bool IntExprEvaluator::VisitCallExpr(const CallExpr *E) { return Error(E); case EvalInfo::EM_ConstantExpressionUnevaluated: case EvalInfo::EM_PotentialConstantExpressionUnevaluated: - return Success(-1ULL, E); + return Success(~0ULL, E); } llvm_unreachable("Invalid EvalMode!"); } diff --git a/tools/clang/lib/AST/HlslTypes.cpp b/tools/clang/lib/AST/HlslTypes.cpp index 5b19e064a3..00c18a81a9 100644 --- a/tools/clang/lib/AST/HlslTypes.cpp +++ b/tools/clang/lib/AST/HlslTypes.cpp @@ -5,9 +5,6 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // -// -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. // // /// /// \file // @@ -95,6 +92,8 @@ bool IsHLSLNumericOrAggregateOfNumericType(clang::QualType type) { } else if (type->isArrayType()) { return IsHLSLNumericOrAggregateOfNumericType( QualType(type->getArrayElementTypeNoTypeQual(), 0)); + } else if (type->isEnumeralType()) { + return true; } // Chars can only appear as part of strings, which we don't consider numeric. @@ -103,31 +102,34 @@ bool IsHLSLNumericOrAggregateOfNumericType(clang::QualType type) { BuiltinTy->getKind() != BuiltinType::Kind::Char_S; } -bool IsHLSLNumericUserDefinedType(clang::QualType type) { - const clang::Type *Ty = type.getCanonicalType().getTypePtr(); +// In some cases we need record types that are annotatable and trivially +// copyable from outside the shader. This excludes resource types which may be +// trivially copyable inside the shader, and builtin matrix and vector types +// which can't be annotated. But includes UDTs of trivially copyable data and +// the builtin trivially copyable raytracing structs. +bool IsHLSLCopyableAnnotatableRecord(clang::QualType QT) { + assert(!QT->isIncompleteType() && "Type must be complete!"); + const clang::Type *Ty = QT.getCanonicalType().getTypePtr(); if (const RecordType *RT = dyn_cast(Ty)) { const RecordDecl *RD = RT->getDecl(); - if (!IsUserDefinedRecordType(type)) + if (!IsUserDefinedRecordType(QT)) return false; - for (auto member : RD->fields()) { - if (!IsHLSLNumericOrAggregateOfNumericType(member->getType())) + for (auto Member : RD->fields()) { + if (!IsHLSLNumericOrAggregateOfNumericType(Member->getType())) return false; } + if (auto *CXXRD = dyn_cast(RD)) { + // Walk up the inheritance chain and check base class fields + for (const auto &Base : CXXRD->bases()) { + if (!IsHLSLCopyableAnnotatableRecord(Base.getType())) + return false; + } + } return true; } return false; } -// In some cases we need record types that are annotatable and trivially -// copyable from outside the shader. This excludes resource types which may be -// trivially copyable inside the shader, and builtin matrix and vector types -// which can't be annotated. But includes UDTs of trivially copyable data and -// the builtin trivially copyable raytracing structs. -bool IsHLSLCopyableAnnotatableRecord(clang::QualType QT) { - return IsHLSLNumericUserDefinedType(QT) || - IsHLSLBuiltinRayAttributeStruct(QT); -} - bool IsHLSLBuiltinRayAttributeStruct(clang::QualType QT) { QT = QT.getCanonicalType(); const clang::Type *Ty = QT.getTypePtr(); @@ -586,6 +588,12 @@ bool IsHLSLRONodeInputRecordType(clang::QualType type) { static_cast(DXIL::NodeIOFlags::Input); } +bool IsHLSLDispatchNodeInputRecordType(clang::QualType type) { + return IsHLSLNodeInputType(type) && + (static_cast(GetNodeIOType(type)) & + static_cast(DXIL::NodeIOFlags::DispatchRecord)) != 0; +} + bool IsHLSLNodeOutputType(clang::QualType type) { return (static_cast(GetNodeIOType(type)) & (static_cast(DXIL::NodeIOFlags::Output) | @@ -593,6 +601,23 @@ bool IsHLSLNodeOutputType(clang::QualType type) { static_cast(DXIL::NodeIOFlags::Output); } +bool IsHLSLNodeRecordArrayType(clang::QualType type) { + if (const RecordType *RT = type->getAs()) { + StringRef name = RT->getDecl()->getName(); + if (name == "ThreadNodeOutputRecords" || name == "GroupNodeOutputRecords" || + name == "GroupNodeInputRecords" || name == "RWGroupNodeInputRecords" || + name == "EmptyNodeInput") + return true; + } + return false; +} + +bool IsHLSLEmptyNodeRecordType(clang::QualType type) { + return (static_cast(GetNodeIOType(type)) & + static_cast(DXIL::NodeIOFlags::EmptyRecord)) == + static_cast(DXIL::NodeIOFlags::EmptyRecord); +} + bool IsHLSLStructuredBufferType(clang::QualType type) { if (const HLSLResourceAttr *Attr = getAttr(type)) return Attr->getResKind() == DXIL::ResourceKind::StructuredBuffer; @@ -609,7 +634,8 @@ bool IsUserDefinedRecordType(clang::QualType QT) { const clang::Type *Ty = QT.getCanonicalType().getTypePtr(); if (const RecordType *RT = dyn_cast(Ty)) { const RecordDecl *RD = RT->getDecl(); - if (RD->isImplicit()) + // Built-in ray tracing struct types are considered user defined types. + if (RD->isImplicit() && !IsHLSLBuiltinRayAttributeStruct(QT)) return false; if (auto TD = dyn_cast(RD)) if (TD->getSpecializedTemplate()->isImplicit()) @@ -834,6 +860,23 @@ QualType GetHLSLResourceResultType(QualType type) { return HandleFieldDecl->getType(); } +QualType GetHLSLNodeIOResultType(ASTContext &astContext, QualType type) { + if (hlsl::IsHLSLEmptyNodeRecordType(type)) { + RecordDecl *RD = astContext.buildImplicitRecord(""); + RD->startDefinition(); + RD->completeDefinition(); + return astContext.getRecordType(RD); + } else if (hlsl::IsHLSLNodeType(type)) { + const RecordType *recordType = type->getAs(); + if (const auto *templateDecl = + dyn_cast(recordType->getDecl())) { + const auto &templateArgs = templateDecl->getTemplateArgs(); + return templateArgs[0].getAsType(); + } + } + return type; +} + unsigned GetHLSLResourceTemplateUInt(clang::QualType type) { const ClassTemplateSpecializationDecl *templateDecl = cast( diff --git a/tools/clang/lib/AST/MicrosoftMangle.cpp b/tools/clang/lib/AST/MicrosoftMangle.cpp index 40dca1bb1b..ae9f1cd7f8 100644 --- a/tools/clang/lib/AST/MicrosoftMangle.cpp +++ b/tools/clang/lib/AST/MicrosoftMangle.cpp @@ -633,7 +633,7 @@ void MicrosoftCXXNameMangler::mangleNumber(int64_t Number) { uint64_t Value = static_cast(Number); if (Number < 0) { - Value = -Value; + Value = ~Value + 1ULL; Out << '?'; } @@ -2308,7 +2308,7 @@ static void mangleThunkThisAdjustment(const CXXMethodDecl *MD, Out << AccessSpec; Mangler.mangleNumber( static_cast(Adjustment.Virtual.Microsoft.VtordispOffset)); - Mangler.mangleNumber(-static_cast(Adjustment.NonVirtual)); + Mangler.mangleNumber(~static_cast(Adjustment.NonVirtual) + 1); } } else if (Adjustment.NonVirtual != 0) { switch (MD->getAccess()) { @@ -2323,7 +2323,7 @@ static void mangleThunkThisAdjustment(const CXXMethodDecl *MD, case AS_public: Out << 'W'; } - Mangler.mangleNumber(-static_cast(Adjustment.NonVirtual)); + Mangler.mangleNumber(~static_cast(Adjustment.NonVirtual) + 1); } else { switch (MD->getAccess()) { case AS_none: diff --git a/tools/clang/lib/AST/SelectorLocationsKind.cpp b/tools/clang/lib/AST/SelectorLocationsKind.cpp index 671207a7f2..36fd8cea6e 100644 --- a/tools/clang/lib/AST/SelectorLocationsKind.cpp +++ b/tools/clang/lib/AST/SelectorLocationsKind.cpp @@ -28,7 +28,7 @@ static SourceLocation getStandardSelLoc(unsigned Index, if (EndLoc.isInvalid()) return SourceLocation(); IdentifierInfo *II = Sel.getIdentifierInfoForSlot(0); - unsigned Len = II ? II->getLength() : 0; + int Len = II ? II->getLength() : 0; return EndLoc.getLocWithOffset(-Len); } @@ -36,7 +36,7 @@ static SourceLocation getStandardSelLoc(unsigned Index, if (ArgLoc.isInvalid()) return SourceLocation(); IdentifierInfo *II = Sel.getIdentifierInfoForSlot(Index); - unsigned Len = /* selector id */ (II ? II->getLength() : 0) + /* ':' */ 1; + int Len = /* selector id */ (II ? II->getLength() : 0) + /* ':' */ 1; if (WithArgSpace) ++Len; return ArgLoc.getLocWithOffset(-Len); diff --git a/tools/clang/lib/CodeGen/CGExprScalar.cpp b/tools/clang/lib/CodeGen/CGExprScalar.cpp index 530c791fcc..50aae94505 100644 --- a/tools/clang/lib/CodeGen/CGExprScalar.cpp +++ b/tools/clang/lib/CodeGen/CGExprScalar.cpp @@ -2559,7 +2559,8 @@ void ScalarExprEmitter::EmitUndefinedBehaviorIntegerDivAndRemCheck( llvm::Value *IntMin = Builder.getInt(llvm::APInt::getSignedMinValue(Ty->getBitWidth())); - llvm::Value *NegOne = llvm::ConstantInt::get(Ty, -1ULL); + llvm::Value *NegOne = + llvm::ConstantInt::get(Ty, std::numeric_limits::max()); llvm::Value *LHSCmp = Builder.CreateICmpNE(Ops.LHS, IntMin); llvm::Value *RHSCmp = Builder.CreateICmpNE(Ops.RHS, NegOne); diff --git a/tools/clang/lib/CodeGen/CGHLSLMS.cpp b/tools/clang/lib/CodeGen/CGHLSLMS.cpp index 16ddeaec60..b5add521a6 100644 --- a/tools/clang/lib/CodeGen/CGHLSLMS.cpp +++ b/tools/clang/lib/CodeGen/CGHLSLMS.cpp @@ -288,6 +288,9 @@ class CGMSHLSLRuntime : public CGHLSLRuntime { llvm::Value *DestPtr, clang::QualType DestTy) override; void AddHLSLFunctionInfo(llvm::Function *, const FunctionDecl *FD) override; + bool FindDispatchGridSemantic(const CXXRecordDecl *RD, + hlsl::SVDispatchGrid &SDGRec, + CharUnits Offset = CharUnits()); void AddHLSLNodeRecordTypeInfo(const clang::ParmVarDecl *parmDecl, hlsl::NodeIOProperties &node); void EmitHLSLFunctionProlog(llvm::Function *, @@ -2560,6 +2563,66 @@ void CGMSHLSLRuntime::AddHLSLFunctionInfo(Function *F, const FunctionDecl *FD) { m_ScopeMap[F] = ScopeInfo(F, FD->getLocation()); } +// Find the input node record field with the SV_DispatchGrid semantic. +// We have already diagnosed any error conditions in Sema, so we +// expect valid size and types, and use the first occurance found. +// We return true if we have populated the SV_DispatchGrid values. +bool CGMSHLSLRuntime::FindDispatchGridSemantic(const CXXRecordDecl *RD, + hlsl::SVDispatchGrid &SDGRec, + CharUnits Offset) { + const ASTRecordLayout &Layout = CGM.getContext().getASTRecordLayout(RD); + + // Check (non-virtual) bases + for (const CXXBaseSpecifier &Base : RD->bases()) { + DXASSERT(!Base.getType()->isDependentType(), + "Node Record with dependent base class not caught by Sema"); + if (Base.getType()->isDependentType()) + continue; + CXXRecordDecl *BaseDecl = Base.getType()->getAsCXXRecordDecl(); + CharUnits BaseOffset = Offset + Layout.getBaseClassOffset(BaseDecl); + if (FindDispatchGridSemantic(BaseDecl, SDGRec, BaseOffset)) + return true; + } + + // Check each field in this record. + for (FieldDecl *Field : RD->fields()) { + uint64_t FieldNo = Field->getFieldIndex(); + CharUnits FieldOffset = Offset + CGM.getContext().toCharUnitsFromBits( + Layout.getFieldOffset(FieldNo)); + + // If this field is a record check its fields + if (const CXXRecordDecl *D = Field->getType()->getAsCXXRecordDecl()) { + if (FindDispatchGridSemantic(D, SDGRec, FieldOffset)) + return true; + } + // Otherwise check this field for the SV_DispatchGrid semantic annotation + for (const hlsl::UnusualAnnotation *UA : Field->getUnusualAnnotations()) { + if (UA->getKind() == hlsl::UnusualAnnotation::UA_SemanticDecl) { + const hlsl::SemanticDecl *SD = cast(UA); + if (SD->SemanticName.equals("SV_DispatchGrid")) { + const llvm::Type *FTy = CGM.getTypes().ConvertType(Field->getType()); + const llvm::Type *ElTy = FTy; + SDGRec.NumComponents = 1; + SDGRec.ByteOffset = (unsigned)FieldOffset.getQuantity(); + if (const llvm::VectorType *VT = dyn_cast(FTy)) { + SDGRec.NumComponents = VT->getNumElements(); + ElTy = VT->getElementType(); + } else if (const llvm::ArrayType *AT = + dyn_cast(FTy)) { + SDGRec.NumComponents = AT->getNumElements(); + ElTy = AT->getElementType(); + } + SDGRec.ComponentType = (ElTy->getIntegerBitWidth() == 16) + ? DXIL::ComponentType::U16 + : DXIL::ComponentType::U32; + return true; + } + } + } + } + return false; +} + void CGMSHLSLRuntime::AddHLSLNodeRecordTypeInfo( const clang::ParmVarDecl *parmDecl, hlsl::NodeIOProperties &node) { clang::QualType paramTy = parmDecl->getType().getCanonicalType(); @@ -2577,7 +2640,6 @@ void CGMSHLSLRuntime::AddHLSLNodeRecordTypeInfo( DiagnosticsEngine &Diags = CGM.getDiags(); auto &Rec = TemplateArgs.get(0); clang::QualType RecType = Rec.getAsType(); - llvm::Type *Type = CGM.getTypes().ConvertType(RecType); CXXRecordDecl *RD = RecType->getAsCXXRecordDecl(); // Get the TrackRWInputSharing flag from the record attribute @@ -2597,63 +2659,12 @@ void CGMSHLSLRuntime::AddHLSLNodeRecordTypeInfo( // Ex: For DispatchNodeInputRecord, set size = // size(MY_RECORD), alignment = alignof(MY_RECORD) + llvm::Type *Type = CGM.getTypes().ConvertType(RecType); node.RecordType.size = CGM.getDataLayout().getTypeAllocSize(Type); node.RecordType.alignment = CGM.getDataLayout().getABITypeAlignment(Type); - // Iterate over fields of the MY_RECORD(example) struct - for (auto fieldDecl : RD->fields()) { - // Check if any of the fields have a semantic annotation = - // SV_DispatchGrid - for (const hlsl::UnusualAnnotation *it : - fieldDecl->getUnusualAnnotations()) { - if (it->getKind() == hlsl::UnusualAnnotation::UA_SemanticDecl) { - const hlsl::SemanticDecl *sd = cast(it); - // if we find a field with SV_DispatchGrid, fill out the - // SV_DispatchGrid member with byteoffset of the field, - // NumComponents (3 for uint3 etc) and U32 vs U16 types, which are - // the only types allowed - if (sd->SemanticName.equals("SV_DispatchGrid")) { - clang::QualType FT = fieldDecl->getType(); - auto &DL = CGM.getDataLayout(); - auto &SDGRec = node.RecordType.SV_DispatchGrid; - - DXASSERT_NOMSG(SDGRec.NumComponents == 0); - - unsigned fieldIdx = fieldDecl->getFieldIndex(); - if (StructType *ST = dyn_cast(Type)) { - SDGRec.ByteOffset = - DL.getStructLayout(ST)->getElementOffset(fieldIdx); - } - const llvm::Type *lTy = CGM.getTypes().ConvertType(FT); - if (const llvm::VectorType *VT = - dyn_cast(lTy)) { - DXASSERT(VT->getElementType()->isIntegerTy(), "invalid type"); - SDGRec.NumComponents = VT->getNumElements(); - SDGRec.ComponentType = - (VT->getElementType()->getIntegerBitWidth() == 16) - ? DXIL::ComponentType::U16 - : DXIL::ComponentType::U32; - } else if (const llvm::ArrayType *AT = - dyn_cast(lTy)) { - DXASSERT(AT->getElementType()->isIntegerTy(), "invalid type"); - DXASSERT_NOMSG(AT->getNumElements() <= 3); - SDGRec.NumComponents = AT->getNumElements(); - SDGRec.ComponentType = - (AT->getElementType()->getIntegerBitWidth() == 16) - ? DXIL::ComponentType::U16 - : DXIL::ComponentType::U32; - } else { - // Scalar U16 or U32 - DXASSERT(lTy->isIntegerTy(), "invalid type"); - SDGRec.NumComponents = 1; - SDGRec.ComponentType = (lTy->getIntegerBitWidth() == 16) - ? DXIL::ComponentType::U16 - : DXIL::ComponentType::U32; - } - } - } - } - } + + FindDispatchGridSemantic(RD, node.RecordType.SV_DispatchGrid); } } } diff --git a/tools/clang/lib/CodeGen/CoverageMappingGen.cpp b/tools/clang/lib/CodeGen/CoverageMappingGen.cpp index eca91590e6..e16e015a74 100644 --- a/tools/clang/lib/CodeGen/CoverageMappingGen.cpp +++ b/tools/clang/lib/CodeGen/CoverageMappingGen.cpp @@ -116,7 +116,7 @@ class CoverageMappingBuilder { /// \brief Return the start location of an included file or expanded macro. SourceLocation getStartOfFileOrMacro(SourceLocation Loc) { if (Loc.isMacroID()) - return Loc.getLocWithOffset(-SM.getFileOffset(Loc)); + return Loc.getLocWithOffset(~SM.getFileOffset(Loc) + 1); return SM.getLocForStartOfFile(SM.getFileID(Loc)); } diff --git a/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp b/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp index 97fe28be7f..f39ec6d497 100644 --- a/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp +++ b/tools/clang/lib/CodeGen/ItaniumCXXABI.cpp @@ -639,8 +639,8 @@ llvm::Constant * ItaniumCXXABI::EmitNullMemberPointer(const MemberPointerType *MPT) { // Itanium C++ ABI 2.3: // A NULL pointer is represented as -1. - if (MPT->isMemberDataPointer()) - return llvm::ConstantInt::get(CGM.PtrDiffTy, -1ULL, /*isSigned=*/true); + if (MPT->isMemberDataPointer()) + return llvm::ConstantInt::get(CGM.PtrDiffTy, -1LL, /*isSigned=*/true); llvm::Constant *Zero = llvm::ConstantInt::get(CGM.PtrDiffTy, 0); llvm::Constant *Values[2] = { Zero, Zero }; @@ -1023,7 +1023,7 @@ static CharUnits computeOffsetHint(ASTContext &Context, // If Dst is not derived from Src we can skip the whole computation below and // return that Src is not a public base of Dst. Record all inheritance paths. if (!Dst->isDerivedFrom(Src, Paths)) - return CharUnits::fromQuantity(-2ULL); + return CharUnits::fromQuantity(-2LL); unsigned NumPublicPaths = 0; CharUnits Offset; @@ -1040,7 +1040,7 @@ static CharUnits computeOffsetHint(ASTContext &Context, // If the path contains a virtual base class we can't give any hint. // -1: no hint. if (J->Base->isVirtual()) - return CharUnits::fromQuantity(-1ULL); + return CharUnits::fromQuantity(-1LL); if (NumPublicPaths > 1) // Won't use offsets, skip computation. continue; @@ -1053,11 +1053,11 @@ static CharUnits computeOffsetHint(ASTContext &Context, // -2: Src is not a public base of Dst. if (NumPublicPaths == 0) - return CharUnits::fromQuantity(-2ULL); + return CharUnits::fromQuantity(-2LL); // -3: Src is a multiple public base type but never a virtual base type. if (NumPublicPaths > 1) - return CharUnits::fromQuantity(-3ULL); + return CharUnits::fromQuantity(-3LL); // Otherwise, the Src type is a unique public nonvirtual base type of Dst. // Return the offset of Src from the origin of Dst. @@ -1090,7 +1090,7 @@ llvm::Value *ItaniumCXXABI::EmitTypeid(CodeGenFunction &CGF, CGF.GetVTablePtr(ThisPtr, StdTypeInfoPtrTy->getPointerTo()); // Load the type info. - Value = CGF.Builder.CreateConstInBoundsGEP1_64(Value, -1ULL); + Value = CGF.Builder.CreateConstInBoundsGEP1_64(Value, -1LL); return CGF.Builder.CreateLoad(Value); } @@ -1154,7 +1154,7 @@ llvm::Value *ItaniumCXXABI::EmitDynamicCastToVoid(CodeGenFunction &CGF, // Get the offset-to-top from the vtable. llvm::Value *OffsetToTop = - CGF.Builder.CreateConstInBoundsGEP1_64(VTable, -2ULL); + CGF.Builder.CreateConstInBoundsGEP1_64(VTable, -2LL); OffsetToTop = CGF.Builder.CreateLoad(OffsetToTop, "offset.to.top"); // Finally, add the offset to the pointer. diff --git a/tools/clang/lib/CodeGen/TargetInfo.cpp b/tools/clang/lib/CodeGen/TargetInfo.cpp index aba43964d9..aaf63355af 100644 --- a/tools/clang/lib/CodeGen/TargetInfo.cpp +++ b/tools/clang/lib/CodeGen/TargetInfo.cpp @@ -1283,7 +1283,7 @@ llvm::Value *X86_32ABIInfo::EmitVAArg(llvm::Value *VAListAddr, QualType Ty, Addr = CGF.Builder.CreateGEP(Addr, Offset); llvm::Value *AsInt = CGF.Builder.CreatePtrToInt(Addr, CGF.Int32Ty); - llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int32Ty, -Align); + llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int32Ty, ~Align + 1); Addr = CGF.Builder.CreateIntToPtr(CGF.Builder.CreateAnd(AsInt, Mask), Addr->getType(), "ap.cur.aligned"); @@ -2849,7 +2849,7 @@ static llvm::Value *EmitVAArgFromMemory(llvm::Value *VAListAddr, overflow_arg_area = CGF.Builder.CreateGEP(overflow_arg_area, Offset); llvm::Value *AsInt = CGF.Builder.CreatePtrToInt(overflow_arg_area, CGF.Int64Ty); - llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int64Ty, -(uint64_t)Align); + llvm::Value *Mask = llvm::ConstantInt::get(CGF.Int64Ty, ~Align + 1); overflow_arg_area = CGF.Builder.CreateIntToPtr(CGF.Builder.CreateAnd(AsInt, Mask), overflow_arg_area->getType(), diff --git a/tools/clang/lib/Format/Format.cpp b/tools/clang/lib/Format/Format.cpp index 7d556c9f0f..b6ca328972 100644 --- a/tools/clang/lib/Format/Format.cpp +++ b/tools/clang/lib/Format/Format.cpp @@ -1049,7 +1049,7 @@ class FormatTokenLexer { FormatTok = new (Allocator.Allocate()) FormatToken; readRawToken(*FormatTok); SourceLocation WhitespaceStart = - FormatTok->Tok.getLocation().getLocWithOffset(-TrailingWhitespace); + FormatTok->Tok.getLocation().getLocWithOffset(~TrailingWhitespace + 1); FormatTok->IsFirst = IsFirstToken; IsFirstToken = false; diff --git a/tools/clang/lib/Format/FormatToken.h b/tools/clang/lib/Format/FormatToken.h index f335eda086..249d526871 100644 --- a/tools/clang/lib/Format/FormatToken.h +++ b/tools/clang/lib/Format/FormatToken.h @@ -86,11 +86,11 @@ namespace format { TYPE(UnaryOperator) \ TYPE(Unknown) -enum TokenType { +enum TokenType : unsigned int { #define TYPE(X) TT_##X, -LIST_TOKEN_TYPES + LIST_TOKEN_TYPES #undef TYPE - NUM_TOKEN_TYPES + NUM_TOKEN_TYPES }; /// \brief Determines the name of a token type. diff --git a/tools/clang/lib/Headers/hlsl/dx/linalg.h b/tools/clang/lib/Headers/hlsl/dx/linalg.h new file mode 100644 index 0000000000..4f5e62070d --- /dev/null +++ b/tools/clang/lib/Headers/hlsl/dx/linalg.h @@ -0,0 +1,198 @@ +// Header for linear algebra APIs. + +#if __spirv__ +#error "Cooperative vectors not (yet) supported for SPIRV" +#endif + +#if ((__SHADER_TARGET_MAJOR > 6) || \ + (__SHADER_TARGET_MAJOR == 6 && __SHADER_TARGET_MINOR >= 9)) && \ + (__HLSL_VERSION >= 2021) + +namespace dx { +namespace linalg { + +// NOTE: can't be an enum class because we get this error: +// error: non-type template argument of type 'dx::linalg::DataType' is not +// an integral constant expression +// +enum DataType { + DATA_TYPE_SINT16 = 2, // ComponentType::I16 + DATA_TYPE_UINT16 = 3, // ComponentType::U16 + DATA_TYPE_SINT32 = 4, // ComponentType::I32 + DATA_TYPE_UINT32 = 5, // ComponentType::U32 + DATA_TYPE_FLOAT16 = 8, // ComponentType::F16 + DATA_TYPE_FLOAT32 = 9, // ComponentType::F32 + DATA_TYPE_SINT8_T4_PACKED = 17, // ComponentType::PackedS8x32 + DATA_TYPE_UINT8_T4_PACKED = 18, // ComponentType::PackedU8x32 + DATA_TYPE_UINT8 = 19, // ComponentType::U8 + DATA_TYPE_SINT8 = 20, // ComponentType::I8 + DATA_TYPE_FLOAT8_E4M3 = 21, // ComponentType::F8_E4M3 + // (1 sign, 4 exp, 3 mantissa bits) + DATA_TYPE_FLOAT8_E5M2 = 22, // ComponentType::F8_E5M2 + // (1 sign, 5 exp, 2 mantissa bits) +}; + +enum MatrixLayout { + MATRIX_LAYOUT_ROW_MAJOR = 0, + MATRIX_LAYOUT_COLUMN_MAJOR = 1, + MATRIX_LAYOUT_MUL_OPTIMAL = 2, + MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL = 3 +}; + +// +// Helper for signedness +// +namespace details { + +template struct IsUnsigned {}; + +#define _SPECIALIZE_ISUNSIGNED(type, value) \ + template <> struct IsUnsigned { \ + static const bool Value = value; \ + } + +_SPECIALIZE_ISUNSIGNED(uint8_t4_packed, true); +_SPECIALIZE_ISUNSIGNED(int8_t4_packed, true); +_SPECIALIZE_ISUNSIGNED(uint32_t, true); +_SPECIALIZE_ISUNSIGNED(int32_t, false); +_SPECIALIZE_ISUNSIGNED(float32_t, false); + +#ifdef __HLSL_ENABLE_16_BIT +_SPECIALIZE_ISUNSIGNED(uint16_t, true); +_SPECIALIZE_ISUNSIGNED(int16_t, false); +_SPECIALIZE_ISUNSIGNED(float16_t, false); +#else // //__HLSL_ENABLE_16_BIT +_SPECIALIZE_ISUNSIGNED(half, false); +#endif //__HLSL_ENABLE_16_BIT + +#undef _SPECIALIZE_ISUNSIGNED + +} // namespace details + +// +// (RW)MatrixRef +// + +template +struct MatrixRefImpl { + BufferTy Buffer; + uint StartOffset; + uint Stride; +}; + +template +using MatrixRef = MatrixRefImpl; + +template +using RWMatrixRef = MatrixRefImpl; + +// +// (RW)VectorRef +// + +template struct VectorRefImpl { + BufferTy Buffer; + uint StartOffset; +}; + +template using VectorRef = VectorRefImpl; + +template +using RWVectorRef = VectorRefImpl; + +// +// Vector +// + +template struct InterpretedVector { + vector Data; +}; + +template +InterpretedVector MakeInterpretedVector(vector Vec) { + InterpretedVector IV = {Vec}; + return IV; +} + +// +// Mul +// + +template +vector +Mul(MatrixRefImpl + Matrix, + InterpretedVector InputVector) { + + vector OutputVector; + + __builtin_MatVecMul( + /*out*/ OutputVector, details::IsUnsigned::Value, + InputVector.Data, details::IsUnsigned::Value, InputDT, + Matrix.Buffer, Matrix.StartOffset, MatrixDT, MatrixM, MatrixK, + MatrixLayout, MatrixTranspose, Matrix.Stride); + + return OutputVector; +} + +// +// MulAdd +// + +template +vector +MulAdd(MatrixRefImpl + Matrix, + InterpretedVector InputVector, + VectorRefImpl BiasVector) { + + vector OutputVector; + + __builtin_MatVecMulAdd( + /*out*/ OutputVector, details::IsUnsigned::Value, + InputVector.Data, details::IsUnsigned::Value, InputDT, + Matrix.Buffer, Matrix.StartOffset, MatrixDT, MatrixM, MatrixK, + MatrixLayout, MatrixTranspose, Matrix.Stride, BiasVector.Buffer, + BiasVector.StartOffset, BiasVectorDT); + + return OutputVector; +} + +// +// OuterProductAccumulate +// + +template +void OuterProductAccumulate( + vector InputVector1, vector InputVector2, + RWMatrixRef Matrix) { + __builtin_OuterProductAccumulate(InputVector1, InputVector2, Matrix.Buffer, + Matrix.StartOffset, MatrixDT, MatrixLayout, + Matrix.Stride); +} + +// +// VectorAccumulate +// + +template +void VectorAccumulate(vector InputVector, + RWByteAddressBuffer Buffer, uint Offset) { + __builtin_VectorAccumulate(InputVector, Buffer, Offset); +} + +} // namespace linalg +} // namespace dx + +#endif // SM 6.9 check and HV version check diff --git a/tools/clang/lib/Lex/Lexer.cpp b/tools/clang/lib/Lex/Lexer.cpp index 089e76b78b..ce9dd8a3c0 100644 --- a/tools/clang/lib/Lex/Lexer.cpp +++ b/tools/clang/lib/Lex/Lexer.cpp @@ -480,7 +480,7 @@ static SourceLocation getBeginningOfFileToken(SourceLocation Loc, } // Create a lexer starting at the beginning of this token. - SourceLocation LexerStartLoc = Loc.getLocWithOffset(-LocInfo.second); + SourceLocation LexerStartLoc = Loc.getLocWithOffset(~LocInfo.second + 1); Lexer TheLexer(LexerStartLoc, LangOpts, BufStart, LexStart, Buffer.end()); TheLexer.SetCommentRetentionState(true); @@ -2737,7 +2737,7 @@ uint32_t Lexer::tryReadUCN(const char *&StartPtr, const char *SlashLoc, char C = getCharAndSize(CurPtr, CharSize); unsigned Value = llvm::hexDigitValue(C); - if (Value == -1U) { + if (Value == std::numeric_limits::max()) { if (Result && !isLexingRawMode()) { if (i == 0) { Diag(BufferPtr, diag::warn_ucn_escape_no_digits) diff --git a/tools/clang/lib/Lex/LiteralSupport.cpp b/tools/clang/lib/Lex/LiteralSupport.cpp index 606c821bb2..62f241812b 100644 --- a/tools/clang/lib/Lex/LiteralSupport.cpp +++ b/tools/clang/lib/Lex/LiteralSupport.cpp @@ -141,8 +141,12 @@ static unsigned ProcessCharEscape(const char *ThisTokBegin, // Hex escapes are a maximal series of hex digits. bool Overflow = false; for (; ThisTokBuf != ThisTokEnd; ++ThisTokBuf) { - int CharVal = llvm::hexDigitValue(ThisTokBuf[0]); - if (CharVal == -1) break; + // originally returned -1 for invalid hex digits, now returns ~0u + // signature: static inline unsigned int llvm::hexDigitValue(char C) + unsigned int CharVal = llvm::hexDigitValue(ThisTokBuf[0]); + if (CharVal == ~0U) + break; + // About to shift out a digit? if (ResultChar & 0xF0000000) Overflow = true; @@ -245,7 +249,7 @@ void clang::expandUCNs(SmallVectorImpl &Buf, StringRef Input) { uint32_t CodePoint = 0; for (++I; NumHexDigits != 0; ++I, --NumHexDigits) { unsigned Value = llvm::hexDigitValue(*I); - assert(Value != -1U); + assert(Value != ~0U); CodePoint <<= 4; CodePoint += Value; @@ -278,8 +282,9 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf, UcnLen = (ThisTokBuf[-1] == 'u' ? 4 : 8); unsigned short UcnLenSave = UcnLen; for (; ThisTokBuf != ThisTokEnd && UcnLenSave; ++ThisTokBuf, UcnLenSave--) { - int CharVal = llvm::hexDigitValue(ThisTokBuf[0]); - if (CharVal == -1) break; + unsigned int CharVal = llvm::hexDigitValue(ThisTokBuf[0]); + if (CharVal == ~0U) + break; UcnVal <<= 4; UcnVal |= CharVal; } diff --git a/tools/clang/lib/Lex/PPMacroExpansion.cpp b/tools/clang/lib/Lex/PPMacroExpansion.cpp index ebfb93df2e..16040d69c7 100644 --- a/tools/clang/lib/Lex/PPMacroExpansion.cpp +++ b/tools/clang/lib/Lex/PPMacroExpansion.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// // // This file implements the top level handling of macro expansion for the diff --git a/tools/clang/lib/Rewrite/Rewriter.cpp b/tools/clang/lib/Rewrite/Rewriter.cpp index be09a363a6..fa081d65ac 100644 --- a/tools/clang/lib/Rewrite/Rewriter.cpp +++ b/tools/clang/lib/Rewrite/Rewriter.cpp @@ -60,7 +60,7 @@ void RewriteBuffer::RemoveText(unsigned OrigOffset, unsigned Size, Buffer.erase(RealOffset, Size); // Add a delta so that future changes are offset correctly. - AddReplaceDelta(OrigOffset, -Size); + AddReplaceDelta(OrigOffset, ~Size + 1); if (removeLineIfEmpty) { // Find the line that the remove occurred and if it is completely empty @@ -86,7 +86,7 @@ void RewriteBuffer::RemoveText(unsigned OrigOffset, unsigned Size, } if (posI != end() && *posI == '\n') { Buffer.erase(curLineStartOffs, lineSize + 1/* + '\n'*/); - AddReplaceDelta(curLineStartOffs, -(lineSize + 1/* + '\n'*/)); + AddReplaceDelta(curLineStartOffs, ~(lineSize + 1 /* + '\n'*/) + 1); } } } diff --git a/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp b/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp index db140f4766..9bb2f1b1fa 100644 --- a/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp +++ b/tools/clang/lib/SPIRV/AlignmentSizeCalculator.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #include "AlignmentSizeCalculator.h" diff --git a/tools/clang/lib/SPIRV/AstTypeProbe.cpp b/tools/clang/lib/SPIRV/AstTypeProbe.cpp index 31a9bd8f7d..b6ca1f60ae 100644 --- a/tools/clang/lib/SPIRV/AstTypeProbe.cpp +++ b/tools/clang/lib/SPIRV/AstTypeProbe.cpp @@ -1353,6 +1353,27 @@ bool isOrContainsNonFpColMajorMatrix(const ASTContext &astContext, return false; } +bool isOrContainsBoolType(QualType type) { + if (isBoolOrVecMatOfBoolType(type)) { + return true; + } + + if (const auto *arrayType = type->getAsArrayTypeUnsafe()) { + return isOrContainsBoolType(arrayType->getElementType()); + } + + if (const auto *recordType = type->getAs()) { + for (auto field : recordType->getDecl()->fields()) { + if (isOrContainsBoolType(field->getType())) { + return true; + } + } + return false; + } + + return false; +} + bool isTypeInVkNamespace(const RecordType *type) { if (const auto *nameSpaceDecl = dyn_cast(type->getDecl()->getDeclContext())) { diff --git a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp index 24dfdc2e9a..c8444a3b81 100644 --- a/tools/clang/lib/SPIRV/CapabilityVisitor.cpp +++ b/tools/clang/lib/SPIRV/CapabilityVisitor.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #include "CapabilityVisitor.h" @@ -125,6 +122,12 @@ void CapabilityVisitor::addCapabilityForType(const SpirvType *type, } addCapabilityForType(raType->getElementType(), loc, sc); } + // Node payload array also requires additional capability. + else if (const auto *npaType = dyn_cast(type)) { + addExtension(Extension::AMD_shader_enqueue, "Vulkan 1.3", loc); + addCapability(spv::Capability::ShaderEnqueueAMDX, loc); + addCapabilityForType(npaType->getElementType(), loc, sc); + } // Image types else if (const auto *imageType = dyn_cast(type)) { switch (imageType->getDimension()) { @@ -257,6 +260,19 @@ bool CapabilityVisitor::visit(SpirvDecoration *decor) { addCapability(spv::Capability::FragmentBarycentricKHR); break; } + case spv::Decoration::NodeSharesPayloadLimitsWithAMDX: + case spv::Decoration::NodeMaxPayloadsAMDX: + case spv::Decoration::TrackFinishWritingAMDX: + case spv::Decoration::PayloadNodeNameAMDX: + case spv::Decoration::PayloadNodeBaseIndexAMDX: + case spv::Decoration::PayloadNodeSparseArrayAMDX: + case spv::Decoration::PayloadNodeArraySizeAMDX: + case spv::Decoration::PayloadDispatchIndirectAMDX: { + featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_3, "WorkGraphs", loc); + addCapability(spv::Capability::ShaderEnqueueAMDX, loc); + addExtension(Extension::AMD_shader_enqueue, "Vulkan 1.3", loc); + break; + } // Capabilities needed for built-ins case spv::Decoration::BuiltIn: { AddVulkanMemoryModelForVolatile(decor, loc); @@ -535,8 +551,14 @@ bool CapabilityVisitor::visitInstruction(SpirvInstruction *instr) { addCapability(spv::Capability::GroupNonUniformQuad); break; case spv::Op::OpVariable: { - if (spvOptions.enableReflect && - !cast(instr)->getHlslUserType().empty()) { + auto var = cast(instr); + auto storage = var->getStorageClass(); + if (storage == spv::StorageClass::NodePayloadAMDX) { + featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_3, "WorkGraphs", loc); + addCapability(spv::Capability::ShaderEnqueueAMDX, loc); + addExtension(Extension::AMD_shader_enqueue, "Vulkan 1.3", loc); + } + if (spvOptions.enableReflect && !var->getHlslUserType().empty()) { addExtension(Extension::GOOGLE_user_type, "HLSL User Type", loc); addExtension(Extension::GOOGLE_hlsl_functionality1, "HLSL User Type", loc); @@ -580,6 +602,28 @@ bool CapabilityVisitor::visitInstruction(SpirvInstruction *instr) { } break; } + case spv::Op::OpConstantStringAMDX: + case spv::Op::OpSpecConstantStringAMDX: + case spv::Op::OpAllocateNodePayloadsAMDX: + case spv::Op::OpEnqueueNodePayloadsAMDX: + case spv::Op::OpIsNodePayloadValidAMDX: + case spv::Op::OpFinishWritingNodePayloadAMDX: { + featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_3, "WorkGraphs", loc); + addCapability(spv::Capability::ShaderEnqueueAMDX, loc); + addExtension(Extension::AMD_shader_enqueue, "Vulkan 1.3", loc); + break; + } + case spv::Op::OpControlBarrier: + case spv::Op::OpMemoryBarrier: { + auto barrier = cast(instr); + if ((bool)(barrier->getMemorySemantics() & + spv::MemorySemanticsMask::OutputMemoryKHR)) { + featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_3, "NODE_OUTPUT_MEMORY", + loc); + addCapability(spv::Capability::VulkanMemoryModel, loc); + } + break; + } default: break; @@ -639,12 +683,25 @@ bool CapabilityVisitor::visit(SpirvEntryPoint *entryPoint) { return true; } -bool CapabilityVisitor::visit(SpirvExecutionMode *execMode) { +bool CapabilityVisitor::visit(SpirvExecutionModeBase *execMode) { spv::ExecutionMode executionMode = execMode->getExecutionMode(); SourceLocation execModeSourceLocation = execMode->getSourceLocation(); SourceLocation entryPointSourceLocation = execMode->getEntryPoint()->getSourceLocation(); switch (executionMode) { + case spv::ExecutionMode::CoalescingAMDX: + case spv::ExecutionMode::MaxNodeRecursionAMDX: + case spv::ExecutionMode::StaticNumWorkgroupsAMDX: + case spv::ExecutionMode::MaxNumWorkgroupsAMDX: + featureManager.requestTargetEnv(SPV_ENV_VULKAN_1_3, "WorkGraphs", + execModeSourceLocation); + addCapability(spv::Capability::ShaderEnqueueAMDX, execModeSourceLocation); + addExtension(Extension::AMD_shader_enqueue, "Vulkan 1.3", + execModeSourceLocation); + break; + case spv::ExecutionMode::SubgroupSize: + addCapability(spv::Capability::SubgroupDispatch, execModeSourceLocation); + break; case spv::ExecutionMode::PostDepthCoverage: addCapability(spv::Capability::SampleMaskPostDepthCoverage, entryPointSourceLocation); diff --git a/tools/clang/lib/SPIRV/CapabilityVisitor.h b/tools/clang/lib/SPIRV/CapabilityVisitor.h index 95db110cce..35d4b5a18b 100644 --- a/tools/clang/lib/SPIRV/CapabilityVisitor.h +++ b/tools/clang/lib/SPIRV/CapabilityVisitor.h @@ -31,7 +31,7 @@ class CapabilityVisitor : public Visitor { bool visit(SpirvDecoration *decor) override; bool visit(SpirvEntryPoint *) override; - bool visit(SpirvExecutionMode *) override; + bool visit(SpirvExecutionModeBase *execMode) override; bool visit(SpirvImageQuery *) override; bool visit(SpirvImageOp *) override; bool visit(SpirvImageSparseTexelsResident *) override; diff --git a/tools/clang/lib/SPIRV/DebugTypeVisitor.cpp b/tools/clang/lib/SPIRV/DebugTypeVisitor.cpp index 058e7b6255..24fab092cc 100644 --- a/tools/clang/lib/SPIRV/DebugTypeVisitor.cpp +++ b/tools/clang/lib/SPIRV/DebugTypeVisitor.cpp @@ -356,6 +356,17 @@ SpirvDebugType *DebugTypeVisitor::lowerToDebugType(const SpirvType *spirvType) { debugType = spvContext.getDebugTypeArray(spirvType, elemDebugType, counts); break; } + case SpirvType::TK_NodePayloadArrayAMD: { + auto *arrType = dyn_cast(spirvType); + SpirvDebugInstruction *elemDebugType = + lowerToDebugType(arrType->getElementType()); + + llvm::SmallVector counts; + counts.push_back(0u); + + debugType = spvContext.getDebugTypeArray(spirvType, elemDebugType, counts); + break; + } case SpirvType::TK_Vector: { auto *vecType = dyn_cast(spirvType); SpirvDebugInstruction *elemDebugType = diff --git a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp index de73d5e417..9d0d8f51a3 100644 --- a/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp +++ b/tools/clang/lib/SPIRV/DeclResultIdMapper.cpp @@ -467,6 +467,10 @@ hlsl::DxilParamInputQual deduceParamQual(const DeclaratorDecl *decl, if (decl->hasAttr()) return hlsl::DxilParamInputQual::InPayload; + if (hlsl::IsHLSLNodeType(type)) { + return hlsl::DxilParamInputQual::NodeIO; + } + return asInput ? hlsl::DxilParamInputQual::In : hlsl::DxilParamInputQual::Out; } @@ -475,6 +479,9 @@ hlsl::DxilParamInputQual deduceParamQual(const DeclaratorDecl *decl, const hlsl::SigPoint *deduceSigPoint(const DeclaratorDecl *decl, bool asInput, const hlsl::ShaderModel::Kind kind, bool forPCF) { + if (kind == hlsl::ShaderModel::Kind::Node) { + return hlsl::SigPoint::GetSigPoint(hlsl::SigPoint::Kind::CSIn); + } return hlsl::SigPoint::GetSigPoint(hlsl::SigPointFromInputQual( deduceParamQual(decl, asInput), kind, forPCF)); } @@ -2158,6 +2165,8 @@ bool DeclResultIdMapper::assignLocations( llvm::DenseSet *stageVariableLocationInfo) { for (const auto *var : vars) { + if (hlsl::IsHLSLNodeType(var->getAstType())) + continue; auto locCount = var->getLocationCount(); uint32_t location = nextLocs(locCount); spvBuilder.decorateLocation(var->getSpirvInstr(), location); @@ -3489,7 +3498,9 @@ SpirvVariable *DeclResultIdMapper::createSpirvInterfaceVariable( StageVar stageVar( stageVarData.sigPoint, *stageVarData.semantic, builtinAttr, evalType, // For HS/DS/GS, we have already stripped the outmost arrayness on type. - getLocationAndComponentCount(astContext, stageVarData.type)); + hlsl::IsHLSLNodeInputType(stageVarData.type) + ? LocationAndComponent({0, 0, false}) + : getLocationAndComponentCount(astContext, stageVarData.type)); const auto name = stageVarData.namePrefix.str() + "." + stageVar.getSemanticStr(); SpirvVariable *varInstr = createSpirvStageVar( @@ -3708,6 +3719,22 @@ bool DeclResultIdMapper::createStageVars(StageVarDataBundle &stageVarData, stageVarData.semantic = &thisSemantic; } + if (hlsl::IsHLSLNodeType(stageVarData.type)) { + // Hijack the notion of semantic to use createSpirvInterfaceVariable + StringRef str = stageVarData.decl->getName(); + stageVarData.semantic->str = stageVarData.semantic->name = str; + stageVarData.semantic->semantic = hlsl::Semantic::GetArbitrary(); + SpirvVariable *varInstr = createSpirvInterfaceVariable(stageVarData); + if (!varInstr) { + return false; + } + + *value = hlsl::IsHLSLNodeInputType(stageVarData.type) + ? varInstr + : loadShaderInputVariable(varInstr, stageVarData); + return true; + } + if (stageVarData.semantic->isValid() && // Structs with attached semantics will be handled later. !stageVarData.type->isStructureType()) { @@ -4161,6 +4188,8 @@ SpirvVariable *DeclResultIdMapper::getBuiltinVar(spv::BuiltIn builtIn, case spv::BuiltIn::GlobalInvocationId: case spv::BuiltIn::WorkgroupId: case spv::BuiltIn::LocalInvocationIndex: + case spv::BuiltIn::RemainingRecursionLevelsAMDX: + case spv::BuiltIn::ShaderIndexAMDX: sc = spv::StorageClass::Input; break; case spv::BuiltIn::TaskCountNV: @@ -4196,7 +4225,9 @@ SpirvVariable *DeclResultIdMapper::createSpirvStageVar( const auto type = stageVar->getAstType(); const auto isPrecise = decl->hasAttr(); auto isNointerp = decl->hasAttr(); - spv::StorageClass sc = getStorageClassForSigPoint(sigPoint); + spv::StorageClass sc = hlsl::IsHLSLNodeInputType(stageVar->getAstType()) + ? spv::StorageClass::NodePayloadAMDX + : getStorageClassForSigPoint(sigPoint); if (sc == spv::StorageClass::Max) return 0; stageVar->setStorageClass(sc); diff --git a/tools/clang/lib/SPIRV/EmitVisitor.cpp b/tools/clang/lib/SPIRV/EmitVisitor.cpp index eb00f59632..eb94ce0797 100644 --- a/tools/clang/lib/SPIRV/EmitVisitor.cpp +++ b/tools/clang/lib/SPIRV/EmitVisitor.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// // Do not change the inclusion order between "dxc/Support/*" files. @@ -617,19 +614,20 @@ bool EmitVisitor::visit(SpirvEntryPoint *inst) { return true; } -bool EmitVisitor::visit(SpirvExecutionMode *inst) { +bool EmitVisitor::visit(SpirvExecutionModeBase *inst) { initInstruction(inst); curInst.push_back(getOrAssignResultId(inst->getEntryPoint())); curInst.push_back(static_cast(inst->getExecutionMode())); - if (inst->getopcode() == spv::Op::OpExecutionMode) { - curInst.insert(curInst.end(), inst->getParams().begin(), - inst->getParams().end()); - } else { - for (uint32_t param : inst->getParams()) { - curInst.push_back(typeHandler.getOrCreateConstantInt( - llvm::APInt(32, param), context.getUIntType(32), - /*isSpecConst */ false)); + if (auto *exeModeId = dyn_cast(inst)) { + for (SpirvInstruction *param : exeModeId->getParams()) { + if (auto *ConstantInst = dyn_cast(param)) + typeHandler.getOrCreateConstant(ConstantInst); + curInst.push_back(getOrAssignResultId(param)); } + } else { + auto *exeMode = llvm::cast(inst); + ArrayRef params = exeMode->getParams(); + curInst.insert(curInst.end(), params.begin(), params.end()); } finalizeInstruction(&preambleBinary); return true; @@ -940,6 +938,73 @@ bool EmitVisitor::visit(SpirvBarrier *inst) { curInst.push_back(memoryScopeId); curInst.push_back(memorySemanticsId); finalizeInstruction(&mainBinary); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); + return true; +} + +bool EmitVisitor::visit(SpirvIsNodePayloadValid *inst) { + initInstruction(inst); + curInst.push_back(inst->getResultTypeId()); + curInst.push_back(getOrAssignResultId(inst)); + curInst.push_back( + getOrAssignResultId(inst->getPayloadArray())); + curInst.push_back( + getOrAssignResultId(inst->getNodeIndex())); + finalizeInstruction(&mainBinary); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); + return true; +} + +bool EmitVisitor::visit(SpirvNodePayloadArrayLength *inst) { + initInstruction(inst); + curInst.push_back(inst->getResultTypeId()); + curInst.push_back(getOrAssignResultId(inst)); + curInst.push_back( + getOrAssignResultId(inst->getPayloadArray())); + finalizeInstruction(&mainBinary); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); + return true; +} + +bool EmitVisitor::visit(SpirvAllocateNodePayloads *inst) { + const uint32_t allocationScopeId = typeHandler.getOrCreateConstantInt( + llvm::APInt(32, static_cast(inst->getAllocationScope())), + context.getUIntType(32), /*isSpecConst */ false); + + initInstruction(inst); + curInst.push_back(inst->getResultTypeId()); + curInst.push_back(getOrAssignResultId(inst)); + curInst.push_back(allocationScopeId); + curInst.push_back( + getOrAssignResultId(inst->getRecordCount())); + curInst.push_back( + getOrAssignResultId(inst->getShaderIndex())); + finalizeInstruction(&mainBinary); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); + return true; +} + +bool EmitVisitor::visit(SpirvEnqueueNodePayloads *inst) { + initInstruction(inst); + curInst.push_back(getOrAssignResultId(inst->getPayload())); + finalizeInstruction(&mainBinary); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); + return true; +} + +bool EmitVisitor::visit(SpirvFinishWritingNodePayload *inst) { + initInstruction(inst); + curInst.push_back(inst->getResultTypeId()); + curInst.push_back(getOrAssignResultId(inst)); + curInst.push_back(getOrAssignResultId(inst->getPayload())); + finalizeInstruction(&mainBinary); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); return true; } @@ -1013,6 +1078,13 @@ bool EmitVisitor::visit(SpirvConstantComposite *inst) { return true; } +bool EmitVisitor::visit(SpirvConstantString *inst) { + typeHandler.getOrCreateConstant(inst); + emitDebugNameForInstruction(getOrAssignResultId(inst), + inst->getDebugName()); + return true; +} + bool EmitVisitor::visit(SpirvConstantNull *inst) { typeHandler.getOrCreateConstant(inst); emitDebugNameForInstruction(getOrAssignResultId(inst), @@ -1563,12 +1635,6 @@ bool EmitVisitor::visit(SpirvDebugLexicalBlock *inst) { } bool EmitVisitor::visit(SpirvDebugScope *inst) { - // Technically entry function wrappers do not exist in HLSL. They - // are just created by DXC. We do not want to emit DebugScope for - // it. - if (inEntryFunctionWrapper) - return true; - initInstruction(inst); curInst.push_back(inst->getResultTypeId()); curInst.push_back(getOrAssignResultId(inst)); @@ -1999,7 +2065,13 @@ bool EmitVisitor::visit(SpirvIntrinsicInstruction *inst) { } } - finalizeInstruction(&mainBinary); + auto opcode = static_cast(inst->getInstruction()); + if ((opcode == spv::Op::OpSpecConstant || opcode == spv::Op::OpConstant) && + !inst->getInstructionSet()) { + finalizeInstruction(&typeConstantBinary); + } else { + finalizeInstruction(&mainBinary); + } return true; } @@ -2074,6 +2146,8 @@ uint32_t EmitTypeHandler::getOrCreateConstant(SpirvConstant *inst) { return getOrCreateConstantNull(constNull); } else if (auto *constBool = dyn_cast(inst)) { return getOrCreateConstantBool(constBool); + } else if (auto *constString = dyn_cast(inst)) { + return getOrCreateConstantString(constString); } else if (auto *constUndef = dyn_cast(inst)) { return getOrCreateUndef(constUndef); } @@ -2112,6 +2186,36 @@ uint32_t EmitTypeHandler::getOrCreateConstantBool(SpirvConstantBoolean *inst) { return inst->getResultId(); } +uint32_t EmitTypeHandler::getOrCreateConstantString(SpirvConstantString *inst) { + const StringRef str = inst->getString(); + const bool isSpecConst = inst->isSpecConstant(); + + if (!isSpecConst && + emittedConstantStrings.find(str) != emittedConstantStrings.end()) { + // Already emitted this constant value. Reuse. + inst->setResultId(emittedConstantStrings[str]->getResultId()); + } else if (isSpecConst && emittedSpecConstantInstructions.find(inst) != + emittedSpecConstantInstructions.end()) { + // We've already emitted this SpecConstant. Reuse. + return inst->getResultId(); + } else { + // Constant wasn't emitted in the past. + const auto &words = string::encodeSPIRVString(inst->getString()); + initTypeInstruction(inst->getopcode()); + curTypeInst.push_back(getOrAssignResultId(inst)); + curTypeInst.insert(curTypeInst.end(), words.begin(), words.end()); + finalizeTypeInstruction(); + // Remember this constant for the future (if not a spec constant) + if (isSpecConst) { + emittedSpecConstantInstructions.insert(inst); + } else { + emittedConstantStrings[str] = inst; + } + } + + return inst->getResultId(); +} + uint32_t EmitTypeHandler::getOrCreateConstantNull(SpirvConstantNull *inst) { auto found = std::find_if(emittedConstantNulls.begin(), emittedConstantNulls.end(), @@ -2532,6 +2636,84 @@ uint32_t EmitTypeHandler::emitType(const SpirvType *type) { if (stride.hasValue()) emitDecoration(id, spv::Decoration::ArrayStride, {stride.getValue()}); } + // NodePayloadArray types + else if (const auto *npaType = dyn_cast(type)) { + const uint32_t elemTypeId = emitType(npaType->getElementType()); + initTypeInstruction(spv::Op::OpTypeNodePayloadArrayAMDX); + curTypeInst.push_back(id); + curTypeInst.push_back(elemTypeId); + finalizeTypeInstruction(); + + // Emit decorations + const ParmVarDecl *nodeDecl = npaType->getNodeDecl(); + if (hlsl::IsHLSLNodeOutputType(nodeDecl->getType())) { + StringRef name = nodeDecl->getName(); + unsigned index = 0; + if (auto nodeID = nodeDecl->getAttr()) { + name = nodeID->getName(); + index = nodeID->getArrayIndex(); + } + + auto *str = new (context) SpirvConstantString(name); + uint32_t nodeName = getOrCreateConstantString(str); + emitDecoration(id, spv::Decoration::PayloadNodeNameAMDX, {nodeName}, + llvm::None, true); + if (index) { + uint32_t baseIndex = getOrCreateConstantInt( + llvm::APInt(32, index), context.getUIntType(32), false); + emitDecoration(id, spv::Decoration::PayloadNodeBaseIndexAMDX, + {baseIndex}, llvm::None, true); + } + } + + uint32_t maxRecords; + if (const auto *attr = nodeDecl->getAttr()) { + maxRecords = getOrCreateConstantInt(llvm::APInt(32, attr->getMaxCount()), + context.getUIntType(32), false); + } else { + maxRecords = getOrCreateConstantInt(llvm::APInt(32, 1), + context.getUIntType(32), false); + } + emitDecoration(id, spv::Decoration::NodeMaxPayloadsAMDX, {maxRecords}, + llvm::None, true); + + if (const auto *attr = nodeDecl->getAttr()) { + const DeclContext *dc = nodeDecl->getParentFunctionOrMethod(); + if (const auto *funDecl = dyn_cast_or_null(dc)) { + IdentifierInfo *ii = attr->getName(); + bool alreadyExists = false; + for (auto *paramDecl : funDecl->params()) { + if (paramDecl->getIdentifier() == ii) { + assert(paramDecl != nodeDecl); + auto otherType = context.getNodeDeclPayloadType(paramDecl); + const uint32_t otherId = + getResultIdForType(otherType, &alreadyExists); + assert(alreadyExists && "forward references not allowed in " + "MaxRecordsSharedWith attribute"); + emitDecoration(id, spv::Decoration::NodeSharesPayloadLimitsWithAMDX, + {otherId}, llvm::None, true); + break; + } + } + assert(alreadyExists && + "invalid reference in MaxRecordsSharedWith attribute"); + } + } + if (const auto *attr = nodeDecl->getAttr()) { + emitDecoration(id, spv::Decoration::PayloadNodeSparseArrayAMDX, {}, + llvm::None); + } + if (const auto *attr = nodeDecl->getAttr()) { + emitDecoration(id, spv::Decoration::PayloadNodeSparseArrayAMDX, {}, + llvm::None); + } + if (const auto *attr = nodeDecl->getAttr()) { + uint32_t arraySize = getOrCreateConstantInt( + llvm::APInt(32, attr->getCount()), context.getUIntType(32), false); + emitDecoration(id, spv::Decoration::PayloadNodeArraySizeAMDX, {arraySize}, + llvm::None, true); + } + } // Structure types else if (const auto *structType = dyn_cast(type)) { std::vector> @@ -2545,6 +2727,15 @@ uint32_t EmitTypeHandler::emitType(const SpirvType *type) { } } + if (const auto recordDecl = dyn_cast_or_null( + context.getStructDeclForSpirvType(structType))) { + auto index = context.getDispatchGridIndex(recordDecl); + if (index.hasValue()) { + emitDecoration(id, spv::Decoration::PayloadDispatchIndirectAMDX, {}, + index); + } + } + // Emit OpMemberName for the struct members. for (size_t i = 0; i < fieldsToGenerate.size(); ++i) emitNameForType(fieldsToGenerate[i].get().name, id, i); @@ -2607,6 +2798,13 @@ uint32_t EmitTypeHandler::emitType(const SpirvType *type) { else if (interfaceType == StructInterfaceType::UniformBuffer) emitDecoration(id, spv::Decoration::Block, {}); + // Emit NodeTrackRWInputSharing decoration if attribute is present. + const auto *structDecl = dyn_cast_or_null( + context.getStructDeclForSpirvType(structType)); + if (structDecl && structDecl->hasAttr()) { + emitDecoration(id, spv::Decoration::TrackFinishWritingAMDX, {}); + } + initTypeInstruction(spv::Op::OpTypeStruct); curTypeInst.push_back(id); for (auto fieldTypeId : fieldTypeIds) @@ -2749,14 +2947,17 @@ void EmitTypeHandler::emitLiteral(const SpirvConstant *literal, void EmitTypeHandler::emitDecoration(uint32_t typeResultId, spv::Decoration decoration, llvm::ArrayRef decorationParams, - llvm::Optional memberIndex) { - + llvm::Optional memberIndex, + bool usesIdParams) { spv::Op op = memberIndex.hasValue() ? spv::Op::OpMemberDecorate : spv::Op::OpDecorate; if (decoration == spv::Decoration::UserTypeGOOGLE) { op = memberIndex.hasValue() ? spv::Op::OpMemberDecorateString : spv::Op::OpDecorateString; } + if (usesIdParams) { + op = spv::Op::OpDecorateId; + } assert(curDecorationInst.empty()); curDecorationInst.push_back(static_cast(op)); diff --git a/tools/clang/lib/SPIRV/EmitVisitor.h b/tools/clang/lib/SPIRV/EmitVisitor.h index 1f9b0939e6..fb4b22e52b 100644 --- a/tools/clang/lib/SPIRV/EmitVisitor.h +++ b/tools/clang/lib/SPIRV/EmitVisitor.h @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_SPIRV_EMITVISITOR_H #define LLVM_CLANG_SPIRV_EMITVISITOR_H @@ -86,7 +83,8 @@ class EmitTypeHandler { // parameters. void emitDecoration(uint32_t typeResultId, spv::Decoration, llvm::ArrayRef decorationParams, - llvm::Optional memberIndex = llvm::None); + llvm::Optional memberIndex = llvm::None, + bool usesIdParams = false); uint32_t getOrCreateConstant(SpirvConstant *); @@ -113,6 +111,7 @@ class EmitTypeHandler { uint32_t getOrCreateConstantNull(SpirvConstantNull *); uint32_t getOrCreateUndef(SpirvUndef *); uint32_t getOrCreateConstantBool(SpirvConstantBoolean *); + uint32_t getOrCreateConstantString(SpirvConstantString *); template void emitLiteral(const SpirvConstant *, vecType &outInst); template @@ -176,6 +175,7 @@ class EmitTypeHandler { emittedConstantInts; llvm::DenseMap, uint32_t> emittedConstantFloats; + llvm::DenseMap emittedConstantStrings; llvm::SmallVector emittedConstantComposites; llvm::SmallVector emittedConstantNulls; llvm::SmallVector emittedUndef; @@ -233,7 +233,7 @@ class EmitVisitor : public Visitor { bool visit(SpirvEmitVertex *) override; bool visit(SpirvEndPrimitive *) override; bool visit(SpirvEntryPoint *) override; - bool visit(SpirvExecutionMode *) override; + bool visit(SpirvExecutionModeBase *) override; bool visit(SpirvString *) override; bool visit(SpirvSource *) override; bool visit(SpirvModuleProcessed *) override; @@ -251,6 +251,11 @@ class EmitVisitor : public Visitor { bool visit(SpirvAccessChain *) override; bool visit(SpirvAtomic *) override; bool visit(SpirvBarrier *) override; + bool visit(SpirvIsNodePayloadValid *inst) override; + bool visit(SpirvNodePayloadArrayLength *inst) override; + bool visit(SpirvAllocateNodePayloads *inst) override; + bool visit(SpirvEnqueueNodePayloads *inst) override; + bool visit(SpirvFinishWritingNodePayload *inst) override; bool visit(SpirvBinaryOp *) override; bool visit(SpirvBitFieldExtract *) override; bool visit(SpirvBitFieldInsert *) override; @@ -258,6 +263,7 @@ class EmitVisitor : public Visitor { bool visit(SpirvConstantInteger *) override; bool visit(SpirvConstantFloat *) override; bool visit(SpirvConstantComposite *) override; + bool visit(SpirvConstantString *) override; bool visit(SpirvConstantNull *) override; bool visit(SpirvConvertPtrToU *) override; bool visit(SpirvConvertUToPtr *) override; @@ -458,6 +464,10 @@ class EmitVisitor : public Visitor { std::vector mainBinary; // String literals to SpirvString objects llvm::StringMap stringIdMap; + // String literals to SpirvConstantString objects + llvm::StringMap stringConstantIdMap; + // String spec constants + llvm::DenseSet stringSpecConstantInstructions; // Main file information for debugging that will be used by OpLine. uint32_t debugMainFileId; // Id for Vulkan DebugInfo extended instruction set. Used when generating diff --git a/tools/clang/lib/SPIRV/FeatureManager.cpp b/tools/clang/lib/SPIRV/FeatureManager.cpp index 7fb449fee9..b6aed4d8b6 100644 --- a/tools/clang/lib/SPIRV/FeatureManager.cpp +++ b/tools/clang/lib/SPIRV/FeatureManager.cpp @@ -214,6 +214,7 @@ Extension FeatureManager::getExtensionSymbol(llvm::StringRef name) { .Case("SPV_EXT_shader_image_int64", Extension::EXT_shader_image_int64) .Case("SPV_KHR_physical_storage_buffer", Extension::KHR_physical_storage_buffer) + .Case("SPV_AMDX_shader_enqueue", Extension::AMD_shader_enqueue) .Case("SPV_KHR_vulkan_memory_model", Extension::KHR_vulkan_memory_model) .Case("SPV_KHR_compute_shader_derivatives", Extension::KHR_compute_shader_derivatives) @@ -284,6 +285,8 @@ const char *FeatureManager::getExtensionName(Extension symbol) { return "SPV_EXT_shader_image_int64"; case Extension::KHR_physical_storage_buffer: return "SPV_KHR_physical_storage_buffer"; + case Extension::AMD_shader_enqueue: + return "SPV_AMDX_shader_enqueue"; case Extension::KHR_vulkan_memory_model: return "SPV_KHR_vulkan_memory_model"; case Extension::KHR_compute_shader_derivatives: diff --git a/tools/clang/lib/SPIRV/GlPerVertex.cpp b/tools/clang/lib/SPIRV/GlPerVertex.cpp index 09b09236b4..aa5a40d008 100644 --- a/tools/clang/lib/SPIRV/GlPerVertex.cpp +++ b/tools/clang/lib/SPIRV/GlPerVertex.cpp @@ -324,6 +324,9 @@ bool GlPerVertex::setClipCullDistanceType(SemanticIndexToTypeMap *typeMap, bool GlPerVertex::doGlPerVertexFacts(const NamedDecl *decl, QualType baseType, bool asInput) { + if (hlsl::IsHLSLNodeType(baseType)) { + return true; + } llvm::StringRef semanticStr; const hlsl::Semantic *semantic = {}; diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp index b31d19b5d8..45d04e8160 100644 --- a/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp +++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #include "LowerTypeVisitor.h" @@ -40,33 +37,6 @@ inline uint32_t roundToPow2(uint32_t val, uint32_t pow2) { } // end anonymous namespace -// This method sorts a field list in the following order: -// - fields with register annotation first, sorted by register index. -// - then fields without annotation, in order of declaration. -static std::vector -sortFields(llvm::ArrayRef fields) { - std::vector output; - output.resize(fields.size()); - - auto back_inserter = output.rbegin(); - std::map fixed_fields; - for (auto it = fields.rbegin(); it < fields.rend(); it++) { - if (it->registerC) { - fixed_fields.insert({it->registerC->RegisterNumber, &*it}); - } else { - *back_inserter = &*it; - back_inserter++; - } - } - - auto front_inserter = output.begin(); - for (const auto &item : fixed_fields) { - *front_inserter = item.second; - front_inserter++; - } - return output; -} - static void setDefaultFieldSize(const AlignmentSizeCalculator &alignmentCalc, const SpirvLayoutRule rule, const HybridStructType::FieldInfo *currentField, @@ -295,6 +265,37 @@ bool LowerTypeVisitor::visitInstruction(SpirvInstruction *instr) { return true; } +std::vector LowerTypeVisitor::sortFields( + llvm::ArrayRef fields) { + std::vector output; + output.resize(fields.size()); + + auto back_inserter = output.rbegin(); + std::map fixed_fields; + for (auto it = fields.rbegin(); it < fields.rend(); it++) { + if (it->registerC) { + auto insertionResult = + fixed_fields.insert({it->registerC->RegisterNumber, &*it}); + if (!insertionResult.second) { + emitError( + "field \"%0\" at register(c%1) overlaps with previous members", + it->registerC->Loc) + << it->name << it->registerC->RegisterNumber; + } + } else { + *back_inserter = &*it; + back_inserter++; + } + } + + auto front_inserter = output.begin(); + for (const auto &item : fixed_fields) { + *front_inserter = item.second; + front_inserter++; + } + return output; +} + const SpirvType *LowerTypeVisitor::lowerType(const SpirvType *type, SpirvLayoutRule rule, SourceLocation loc) { @@ -365,6 +366,16 @@ const SpirvType *LowerTypeVisitor::lowerType(const SpirvType *type, return raType; return spvContext.getRuntimeArrayType(loweredElemType, raType->getStride()); } + // Node payload arrays could contain a hybrid type + else if (const auto *npaType = dyn_cast(type)) { + const auto *loweredElemType = + lowerType(npaType->getElementType(), rule, loc); + // If runtime array didn't contain any hybrid types, return itself. + if (npaType->getElementType() == loweredElemType) + return npaType; + return spvContext.getNodePayloadArrayType(loweredElemType, + npaType->getNodeDecl()); + } // Pointer types could point to a hybrid type. else if (const auto *ptrType = dyn_cast(type)) { const auto *loweredPointee = @@ -1149,6 +1160,10 @@ LowerTypeVisitor::lowerStructFields(const RecordDecl *decl, spv::ImageFormat LowerTypeVisitor::translateSampledTypeToImageFormat(QualType sampledType, SourceLocation srcLoc) { + + if (spvOptions.useUnknownImageFormat) + return spv::ImageFormat::Unknown; + uint32_t elemCount = 1; QualType ty = {}; if (!isScalarType(sampledType, &ty) && @@ -1367,12 +1382,19 @@ LowerTypeVisitor::populateLayoutInformation( llvm::SmallVector loweredFields; llvm::DenseMap fieldToIndexMap; + llvm::SmallVector result; + // This stores the index of the field in the actual SPIR-V construct. // When bitfields are merged, this index will be the same for merged fields. uint32_t fieldIndexInConstruct = 0; for (size_t i = 0, iPrevious = -1; i < sortedFields.size(); iPrevious = i++) { const size_t fieldIndexForMap = loweredFields.size(); + // Can happen if sortFields runs over fields with the same register(c#) + if (!sortedFields[i]) { + return result; + } + loweredFields.emplace_back(fieldVisitor( (iPrevious < loweredFields.size() ? &loweredFields[iPrevious] : nullptr), @@ -1386,7 +1408,6 @@ LowerTypeVisitor::populateLayoutInformation( } // Re-order the sorted fields back to their original order. - llvm::SmallVector result; for (const auto &field : fields) result.push_back(loweredFields[fieldToIndexMap[&field]]); return result; diff --git a/tools/clang/lib/SPIRV/LowerTypeVisitor.h b/tools/clang/lib/SPIRV/LowerTypeVisitor.h index 5b26b67e3a..276e6c9232 100644 --- a/tools/clang/lib/SPIRV/LowerTypeVisitor.h +++ b/tools/clang/lib/SPIRV/LowerTypeVisitor.h @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_LIB_SPIRV_LOWERTYPEVISITOR_H @@ -65,6 +62,12 @@ class LowerTypeVisitor : public Visitor { return astContext.getDiagnostics().Report(srcLoc, diagId); } + // This method sorts a field list in the following order: + // - fields with register annotation first, sorted by register index. + // - then fields without annotation, in order of declaration. + std::vector + sortFields(llvm::ArrayRef fields); + /// Lowers the given Hybrid type into a SPIR-V type. /// /// Uses the above lowerType method to lower the QualType components of hybrid diff --git a/tools/clang/lib/SPIRV/PreciseVisitor.cpp b/tools/clang/lib/SPIRV/PreciseVisitor.cpp index 34e6087990..f1869318a4 100644 --- a/tools/clang/lib/SPIRV/PreciseVisitor.cpp +++ b/tools/clang/lib/SPIRV/PreciseVisitor.cpp @@ -60,6 +60,9 @@ bool isAccessingPrecise(clang::spirv::SpirvAccessChain *inst) { } else if (auto *raType = llvm::dyn_cast(baseType)) { indexes.pop(); baseType = raType->getElementType(); + } else if (auto *npaType = llvm::dyn_cast(baseType)) { + indexes.pop(); + baseType = npaType->getElementType(); } else if (auto *structType = llvm::dyn_cast(baseType)) { SpirvInstruction *index = indexes.top(); if (auto *constInt = llvm::dyn_cast(index)) { diff --git a/tools/clang/lib/SPIRV/SpirvBuilder.cpp b/tools/clang/lib/SPIRV/SpirvBuilder.cpp index 689fc0715f..22523eed0e 100644 --- a/tools/clang/lib/SPIRV/SpirvBuilder.cpp +++ b/tools/clang/lib/SPIRV/SpirvBuilder.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #include "clang/SPIRV/SpirvBuilder.h" @@ -84,7 +81,9 @@ SpirvBuilder::addFnParam(QualType ptrType, bool isPrecise, bool isNointerp, param = new (context) SpirvFunctionParameter(ptrType, isPrecise, isNointerp, loc); } - param->setStorageClass(spv::StorageClass::Function); + param->setStorageClass(hlsl::IsHLSLNodeInputType(ptrType) + ? spv::StorageClass::NodePayloadAMDX + : spv::StorageClass::Function); param->setDebugName(name); function->addParameter(param); return param; @@ -206,10 +205,17 @@ SpirvInstruction *SpirvBuilder::createLoad(QualType resultType, instruction->setRValue(true); if (pointer->getStorageClass() == spv::StorageClass::PhysicalStorageBuffer) { - AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions); - uint32_t align, size, stride; - std::tie(align, size) = alignmentCalc.getAlignmentAndSize( - resultType, pointer->getLayoutRule(), llvm::None, &stride); + QualType pointerType = pointer->getAstResultType(); + uint32_t align = 0; + if (!pointerType.isNull() && hlsl::IsVKBufferPointerType(pointerType)) { + align = hlsl::GetVKBufferPointerAlignment(pointerType); + } + if (!align) { + AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions); + uint32_t stride; + std::tie(align, std::ignore) = alignmentCalc.getAlignmentAndSize( + resultType, pointer->getLayoutRule(), llvm::None, &stride); + } instruction->setAlignment(align); } @@ -233,6 +239,13 @@ SpirvInstruction *SpirvBuilder::createLoad(QualType resultType, createEndInvocationInterlockEXT(loc, range); } + if (context.hasLoweredType(pointer)) { + // preserve distinct node payload array types + auto *ptrType = dyn_cast(pointer->getResultType()); + instruction->setResultType(ptrType->getPointeeType()); + context.addToInstructionsWithLoweredType(instruction); + } + const auto &bitfieldInfo = pointer->getBitfieldInfo(); if (!bitfieldInfo.hasValue()) return instruction; @@ -309,6 +322,12 @@ SpirvStore *SpirvBuilder::createStore(SpirvInstruction *address, auto *instruction = new (context) SpirvStore(loc, address, source, llvm::None, range); + if (context.hasLoweredType(source)) { + // preserve distinct node payload array types + address->setResultType(context.getPointerType(source->getResultType(), + address->getStorageClass())); + context.addToInstructionsWithLoweredType(address); + } insertPoint->addInstruction(instruction); if (address->getStorageClass() == spv::StorageClass::PhysicalStorageBuffer && @@ -316,7 +335,7 @@ SpirvStore *SpirvBuilder::createStore(SpirvInstruction *address, AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions); uint32_t align, size, stride; std::tie(align, size) = alignmentCalc.getAlignmentAndSize( - address->getAstResultType(), address->getLayoutRule(), llvm::None, + source->getAstResultType(), address->getLayoutRule(), llvm::None, &stride); instruction->setAlignment(align); } @@ -875,6 +894,53 @@ SpirvInstruction *SpirvBuilder::createNonSemanticDebugPrintfExtInst( return extInst; } +SpirvInstruction * +SpirvBuilder::createIsNodePayloadValid(SpirvInstruction *payloadArray, + SpirvInstruction *nodeIndex, + SourceLocation loc) { + auto *inst = new (context) + SpirvIsNodePayloadValid(astContext.BoolTy, loc, payloadArray, nodeIndex); + insertPoint->addInstruction(inst); + return inst; +} + +SpirvInstruction * +SpirvBuilder::createNodePayloadArrayLength(SpirvInstruction *payloadArray, + SourceLocation loc) { + auto *inst = new (context) + SpirvNodePayloadArrayLength(astContext.UnsignedIntTy, loc, payloadArray); + insertPoint->addInstruction(inst); + return inst; +} + +SpirvInstruction *SpirvBuilder::createAllocateNodePayloads( + QualType resultType, spv::Scope allocationScope, + SpirvInstruction *shaderIndex, SpirvInstruction *recordCount, + SourceLocation loc) { + assert(insertPoint && "null insert point"); + auto *inst = new (context) SpirvAllocateNodePayloads( + resultType, loc, allocationScope, shaderIndex, recordCount); + insertPoint->addInstruction(inst); + return inst; +} + +void SpirvBuilder::createEnqueueOutputNodePayloads(SpirvInstruction *payload, + SourceLocation loc) { + assert(insertPoint && "null insert point"); + auto *inst = new (context) SpirvEnqueueNodePayloads(loc, payload); + insertPoint->addInstruction(inst); +} + +SpirvInstruction * +SpirvBuilder::createFinishWritingNodePayload(SpirvInstruction *payload, + SourceLocation loc) { + assert(insertPoint && "null insert point"); + auto *inst = new (context) + SpirvFinishWritingNodePayload(astContext.BoolTy, loc, payload); + insertPoint->addInstruction(inst); + return inst; +} + void SpirvBuilder::createBarrier(spv::Scope memoryScope, spv::MemorySemanticsMask memorySemantics, llvm::Optional exec, @@ -1869,6 +1935,14 @@ SpirvConstant *SpirvBuilder::getConstantNull(QualType type) { return nullConst; } +SpirvConstant *SpirvBuilder::getConstantString(llvm::StringRef str, + bool specConst) { + // We do not care about making unique constants at this point. + auto *stringConst = new (context) SpirvConstantString(str, specConst); + mod->addConstant(stringConst); + return stringConst; +} + SpirvUndef *SpirvBuilder::getUndef(QualType type) { // We do not care about making unique constants at this point. auto *undef = new (context) SpirvUndef(type); diff --git a/tools/clang/lib/SPIRV/SpirvContext.cpp b/tools/clang/lib/SPIRV/SpirvContext.cpp index 47dfc67433..88716dddde 100644 --- a/tools/clang/lib/SPIRV/SpirvContext.cpp +++ b/tools/clang/lib/SPIRV/SpirvContext.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// #include @@ -65,6 +62,9 @@ SpirvContext::~SpirvContext() { for (auto *raType : runtimeArrayTypes) raType->~RuntimeArrayType(); + for (auto *npaType : nodePayloadArrayTypes) + npaType->~NodePayloadArrayType(); + for (auto *fnType : functionTypes) fnType->~FunctionType(); @@ -276,6 +276,19 @@ SpirvContext::getRuntimeArrayType(const SpirvType *elemType, return *(inserted.first); } +const NodePayloadArrayType * +SpirvContext::getNodePayloadArrayType(const SpirvType *elemType, + const ParmVarDecl *nodeDecl) { + NodePayloadArrayType type(elemType, nodeDecl); + auto found = nodePayloadArrayTypes.find(&type); + if (found != nodePayloadArrayTypes.end()) + return *found; + + auto inserted = nodePayloadArrayTypes.insert( + new (this) NodePayloadArrayType(elemType, nodeDecl)); + return *(inserted.first); +} + const StructType * SpirvContext::getStructType(llvm::ArrayRef fields, llvm::StringRef name, bool isReadOnly, diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index cd5f860555..734340e9ae 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// // // This file implements a SPIR-V emitter class that takes in HLSL AST and emits @@ -268,7 +265,8 @@ inline bool canActAsInParmVar(const ParmVarDecl *param) { return !param->hasAttr() && // GS output streams are marked as inout, but it should not be // used as in parameter. - !hlsl::IsHLSLStreamOutputType(param->getType()); + !hlsl::IsHLSLStreamOutputType(param->getType()) && + !hlsl::IsHLSLNodeOutputType(param->getType()); } /// Returns true if the given function parameter can act as shader stage @@ -604,8 +602,8 @@ SpirvEmitter::SpirvEmitter(CompilerInstance &ci) emitError("unknown shader module: %0", {}) << shaderModel->GetName(); if (spirvOptions.invertY && !shaderModel->IsVS() && !shaderModel->IsDS() && - !shaderModel->IsGS() && !shaderModel->IsMS()) - emitError("-fvk-invert-y can only be used in VS/DS/GS/MS", {}); + !shaderModel->IsGS() && !shaderModel->IsMS() && !shaderModel->IsLib()) + emitError("-fvk-invert-y can only be used in VS/DS/GS/MS/Lib", {}); if (spirvOptions.useGlLayout && spirvOptions.useDxLayout) emitError("cannot specify both -fvk-use-dx-layout and -fvk-use-gl-layout", @@ -1146,8 +1144,9 @@ void SpirvEmitter::doStmt(const Stmt *stmt, // All cases for expressions used as statements SpirvInstruction *result = doExpr(expr); - if (result && result->getKind() == SpirvInstruction::IK_ExecutionMode && - !attrs.empty()) { + if (result && !attrs.empty() && + (result->getKind() == SpirvInstruction::IK_ExecutionMode || + result->getKind() == SpirvInstruction::IK_ExecutionModeId)) { // Handle [[vk::ext_capability(..)]] and [[vk::ext_extension(..)]] // attributes for vk::ext_execution_mode[_id](..). createSpirvIntrInstExt( @@ -1262,6 +1261,15 @@ SpirvInstruction *SpirvEmitter::doExpr(const Expr *expr, return result; } +SpirvInstruction *SpirvEmitter::doExprEnsuringRValue(const Expr *E, + SourceLocation location, + SourceRange range) { + SpirvInstruction *I = doExpr(E); + if (I->isRValue()) + return I; + return spvBuilder.createLoad(E->getType(), I, location, range); +} + SpirvInstruction *SpirvEmitter::loadIfGLValue(const Expr *expr, SourceRange rangeOverride) { // We are trying to load the value here, which is what an LValueToRValue @@ -1274,7 +1282,8 @@ SpirvInstruction *SpirvEmitter::loadIfGLValue(const Expr *expr, } SpirvInstruction *SpirvEmitter::loadIfGLValue(const Expr *expr, - SpirvInstruction *info) { + SpirvInstruction *info, + SourceRange rangeOverride) { const auto exprType = expr->getType(); // Do nothing if this is already rvalue @@ -1309,9 +1318,11 @@ SpirvInstruction *SpirvEmitter::loadIfGLValue(const Expr *expr, return info; } + SourceRange range = + (rangeOverride != SourceRange()) ? rangeOverride : expr->getSourceRange(); SpirvInstruction *loadedInstr = nullptr; - loadedInstr = spvBuilder.createLoad(exprType, info, expr->getExprLoc(), - expr->getSourceRange()); + loadedInstr = + spvBuilder.createLoad(exprType, info, expr->getExprLoc(), range); assert(loadedInstr); // Special-case: According to the SPIR-V Spec: There is no physical size or @@ -1414,6 +1425,83 @@ SpirvInstruction *SpirvEmitter::castToType(SpirvInstruction *value, return nullptr; } +static bool handleDispatchGrid(SpirvContext &spvContext, + const RecordDecl *recordDecl) { + unsigned index = 0; + for (auto fieldDecl : recordDecl->fields()) { + QualType fieldType = fieldDecl->getType(); + for (const hlsl::UnusualAnnotation *it : + fieldDecl->getUnusualAnnotations()) { + if (it->getKind() == hlsl::UnusualAnnotation::UA_SemanticDecl) { + const hlsl::SemanticDecl *sd = cast(it); + if (sd->SemanticName.equals("SV_DispatchGrid")) { + spvContext.registerDispatchGridIndex(recordDecl, index); + return true; + } + } + } + if (const auto *innerType = fieldType->getAs()) { + if (handleDispatchGrid(spvContext, innerType->getDecl())) + return true; + } + ++index; + } + return false; +} + +bool SpirvEmitter::handleNodePayloadArrayType(const ParmVarDecl *decl, + SpirvInstruction *instr) { + // Because SPIR-V node payload array types are node-specific, propagate + // lowered types + switch (instr->getKind()) { + case SpirvInstruction::Kind::IK_Load: { + SpirvInstruction *ptr = dyn_cast(instr)->getPointer(); + if (handleNodePayloadArrayType(decl, ptr)) { + const SpirvPointerType *ptrType = + dyn_cast(ptr->getResultType()); + instr->setResultType(ptrType->getPointeeType()); + spvContext.addToInstructionsWithLoweredType(instr); + return true; + } + return false; + } + case SpirvInstruction::Kind::IK_FunctionParameter: + case SpirvInstruction::Kind::IK_Variable: { + QualType varType = decl->getType(); + if (hlsl::IsHLSLNodeType(varType)) { + if (auto *type = spvContext.getNodeDeclPayloadType(decl)) { + instr->setResultType( + spvContext.getPointerType(type, instr->getStorageClass())); + } else { + LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions, + spvBuilder); + QualType resultType = + hlsl::GetHLSLNodeIOResultType(astContext, varType); + const auto *recordType = resultType->getAs(); + assert(recordType); + if (hlsl::IsHLSLDispatchNodeInputRecordType(varType)) { + handleDispatchGrid(spvContext, recordType->getDecl()); + } + const SpirvType *elemType = lowerTypeVisitor.lowerType( + resultType, clang::spirv::SpirvLayoutRule::Scalar, llvm::None, + decl->getLocation()); + const NodePayloadArrayType *arrType = + spvContext.getNodePayloadArrayType(elemType, decl); + const SpirvType *ptrType = + spvContext.getPointerType(arrType, instr->getStorageClass()); + instr->setResultType(ptrType); + spvContext.registerNodeDeclPayloadType(arrType, decl); + } + spvContext.addToInstructionsWithLoweredType(instr); + return true; + } + return false; + } + default: + return false; + } +} + void SpirvEmitter::doFunctionDecl(const FunctionDecl *decl) { // Forward declaration of a function inside another. if (!decl->isThisDeclarationADefinition()) { @@ -1555,6 +1643,9 @@ void SpirvEmitter::doFunctionDecl(const FunctionDecl *decl) { QualType paramType = paramDecl->getType(); auto *param = declIdMapper.createFnParam(paramDecl, i + 1 + isNonStaticMemberFn); + if (isEntry) { + handleNodePayloadArrayType(paramDecl, param); + } #ifdef ENABLE_SPIRV_CODEGEN if (hlsl::IsVKBufferPointerType(paramType)) { Optional isRowMajor = llvm::None; @@ -2020,6 +2111,10 @@ void SpirvEmitter::doVarDecl(const VarDecl *decl) { // variables) belongs to the Function storage class. if (isExternalVar(decl)) { var = declIdMapper.createExternVar(decl); + if (decl->hasInit()) { + emitWarning("Initializer of external global will be ignored", + decl->getLocation()); + } } else { // We already know the variable is not externally visible here. If it does // not have local storage, it should be file scope variable. @@ -4304,9 +4399,7 @@ SpirvEmitter::processTextureLevelOfDetail(const CXXMemberCallExpr *expr, spvBuilder.createImageQuery(spv::Op::OpImageQueryLod, queryResultType, expr->getExprLoc(), sampledImage, coordinate); - if (spvContext.isCS()) { - addDerivativeGroupExecutionMode(); - } + addDerivativeGroupExecutionMode(); // The first component of the float2 contains the mipmap array layer. // The second component of the float2 represents the unclamped lod. return spvBuilder.createCompositeExtract(astContext.FloatTy, query, @@ -5307,6 +5400,9 @@ SpirvEmitter::doCXXMemberCallExpr(const CXXMemberCallExpr *expr) { uint32_t opcode = static_cast(hlsl::IntrinsicOp::Num_Intrinsics); if (hlsl::GetIntrinsicOp(callee, opcode, group)) { + if (group == "subscript") { + return processIntrinsicExtractRecordStruct(expr); + } return processIntrinsicMemberCall(expr, static_cast(opcode)); } @@ -5503,6 +5599,28 @@ SpirvEmitter::processIntrinsicMemberCall(const CXXMemberCallExpr *expr, return processRayQueryIntrinsics(expr, opcode); case IntrinsicOp::MOP_GetBufferContents: return processIntrinsicGetBufferContents(expr); + case hlsl::IntrinsicOp::MOP_GetThreadNodeOutputRecords: + return processIntrinsicGetNodeOutputRecords(expr, false); + case hlsl::IntrinsicOp::MOP_GetGroupNodeOutputRecords: + return processIntrinsicGetNodeOutputRecords(expr, true); + case hlsl::IntrinsicOp::MOP_ThreadIncrementOutputCount: + retVal = processIntrinsicIncrementOutputCount(expr, false); + break; + case hlsl::IntrinsicOp::MOP_GroupIncrementOutputCount: + retVal = processIntrinsicIncrementOutputCount(expr, true); + break; + case hlsl::IntrinsicOp::MOP_IsValid: + retVal = processIntrinsicIsValid(expr); + break; + case hlsl::IntrinsicOp::MOP_Count: + retVal = processIntrinsicGetRecordCount(expr); + break; + case hlsl::IntrinsicOp::MOP_OutputComplete: + processIntrinsicOutputComplete(expr); + break; + case hlsl::IntrinsicOp::MOP_FinishedCrossGroupSharing: + retVal = processIntrinsicFinishedCrossGroupSharing(expr); + break; default: emitError("intrinsic '%0' method unimplemented", expr->getCallee()->getExprLoc()) @@ -5554,7 +5672,8 @@ SpirvInstruction *SpirvEmitter::createImageSample( const bool isExplicit = lod || (grad.first && grad.second); // Implicit-lod instructions are only allowed in pixel and compute shaders. - if (!spvContext.isPS() && !spvContext.isCS() && !isExplicit) + if (!spvContext.isPS() && !spvContext.isCS() && !spvContext.isNode() && + !isExplicit) emitError("sampling with implicit lod is only allowed in fragment and " "compute shaders", loc); @@ -5659,9 +5778,7 @@ SpirvEmitter::processTextureSampleGather(const CXXMemberCallExpr *expr, const auto retType = expr->getDirectCallee()->getReturnType(); if (isSample) { - if (spvContext.isCS()) { - addDerivativeGroupExecutionMode(); - } + addDerivativeGroupExecutionMode(); return createImageSample(retType, imageType, image, sampler, coordinate, /*compareVal*/ nullptr, /*bias*/ nullptr, /*lod*/ nullptr, std::make_pair(nullptr, nullptr), @@ -5749,9 +5866,9 @@ SpirvEmitter::processTextureSampleBiasLevel(const CXXMemberCallExpr *expr, const auto retType = expr->getDirectCallee()->getReturnType(); - if (!lod && spvContext.isCS()) { + if (!lod) addDerivativeGroupExecutionMode(); - } + return createImageSample( retType, imageType, image, sampler, coordinate, /*compareVal*/ nullptr, bias, lod, std::make_pair(nullptr, nullptr), @@ -5871,9 +5988,7 @@ SpirvEmitter::processTextureSampleCmp(const CXXMemberCallExpr *expr) { const auto retType = expr->getDirectCallee()->getReturnType(); const auto imageType = imageExpr->getType(); - if (spvContext.isCS()) { - addDerivativeGroupExecutionMode(); - } + addDerivativeGroupExecutionMode(); return createImageSample( retType, imageType, image, sampler, coordinate, compareVal, @@ -5926,9 +6041,7 @@ SpirvEmitter::processTextureSampleCmpBias(const CXXMemberCallExpr *expr) { const auto retType = expr->getDirectCallee()->getReturnType(); const auto imageType = imageExpr->getType(); - if (spvContext.isCS()) { - addDerivativeGroupExecutionMode(); - } + addDerivativeGroupExecutionMode(); return createImageSample( retType, imageType, image, sampler, coordinate, compareVal, bias, @@ -6987,6 +7100,38 @@ void SpirvEmitter::storeValue(SpirvInstruction *lhsPtr, } } +bool SpirvEmitter::canUseOpCopyLogical(QualType type) const { + if (featureManager.getSpirvVersion(featureManager.getTargetEnv()) < + VersionTuple(1, 4)) { + return false; + } + + if (!type->isArrayType() && !type->isRecordType()) { + return false; + } + + if (const auto *recordType = type->getAs()) { + if (isTypeInVkNamespace(recordType) && + (recordType->getDecl()->getName().equals("BufferPointer") || + recordType->getDecl()->getName().equals("SpirvType") || + recordType->getDecl()->getName().equals("SpirvOpaqueType"))) { + // vk::BufferPointer lowers to a pointer type. No need to reconstruct + // the value. The vk::Spirv*Type should be treated an opaque type. All we + // can do is leave it the same. + return false; + } + } + + if (hlsl::IsHLSLVecMatType(type) || hlsl::IsHLSLResourceType(type)) { + return false; + } + + // If the type contains a bool it is possible that one type represents it with + // a bool and the other with an int. If that happens, OpCopyLogical is not + // valid. + return !isOrContainsBoolType(type); +} + SpirvInstruction *SpirvEmitter::reconstructValue(SpirvInstruction *srcVal, const QualType valType, SpirvLayoutRule dstLR, @@ -7050,6 +7195,13 @@ SpirvInstruction *SpirvEmitter::reconstructValue(SpirvInstruction *srcVal, return result; }; + if (canUseOpCopyLogical(valType)) { + SpirvInstruction *copy = spvBuilder.createUnaryOp( + spv::Op::OpCopyLogical, valType, srcVal, srcVal->getSourceLocation()); + copy->setLayoutRule(dstLR); + return copy; + } + // Constant arrays if (const auto *arrayType = astContext.getAsConstantArrayType(valType)) { const auto elemType = arrayType->getElementType(); @@ -7080,14 +7232,17 @@ SpirvInstruction *SpirvEmitter::reconstructValue(SpirvInstruction *srcVal, // Structs if (const auto *recordType = valType->getAs()) { - assert(recordType->isStructureType()); - if (isTypeInVkNamespace(recordType) && - recordType->getDecl()->getName().equals("BufferPointer")) { - // Uniquely among structs, vk::BufferPointer lowers to a pointer type. + (recordType->getDecl()->getName().equals("BufferPointer") || + recordType->getDecl()->getName().equals("SpirvType") || + recordType->getDecl()->getName().equals("SpirvOpaqueType"))) { + // vk::BufferPointer lowers to a pointer type. No need to reconstruct + // the value. The vk::Spirv*Type should be treated an opaque type. All we + // can do is leave it the same. return srcVal; } + assert(recordType->isStructureType()); LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions, spvBuilder); const StructType *spirvStructType = @@ -7955,15 +8110,12 @@ SpirvInstruction *SpirvEmitter::tryToAssignToVectorElements( } auto *vec1 = doExpr(base, range); - auto *vec1Val = - vec1->isRValue() - ? vec1 - : spvBuilder.createLoad(baseType, vec1, base->getLocStart(), range); + auto *vec1Val = vec1->isRValue() ? vec1 : loadIfGLValue(base, vec1, range); auto *shuffle = spvBuilder.createVectorShuffle( baseType, vec1Val, rhs, selectors, lhs->getLocStart(), range); if (!tryToAssignToRWBufferRWTexture(base, shuffle)) - spvBuilder.createStore(vec1, shuffle, lhs->getLocStart(), range); + storeValue(vec1, shuffle, base->getType(), lhs->getLocStart(), range); // TODO: OK, this return value is incorrect for compound assignments, for // which cases we should return lvalues. Should at least emit errors if @@ -8633,9 +8785,10 @@ const Expr *SpirvEmitter::collectArrayStructIndices( } { - // Indexing into ConstantBuffers and TextureBuffers involves an additional - // FlatConversion node which casts the handle to the underlying structure - // type. We can look past the FlatConversion to continue to collect indices. + // Indexing into ConstantBuffers, TextureBuffers, and node input/output + // types involves an additional FlatConversion node which casts the handle + // to the underlying structure type. We can look past the FlatConversion to + // continue to collect indices. // For example: MyConstantBufferArray[0].structMember1 // `-MemberExpr .structMember1 // `-ImplicitCastExpr 'const T' lvalue @@ -8644,7 +8797,8 @@ const Expr *SpirvEmitter::collectArrayStructIndices( if (castExpr->getCastKind() == CK_FlatConversion) { const auto *subExpr = castExpr->getSubExpr(); const QualType subExprType = subExpr->getType(); - if (isConstantTextureBuffer(subExprType)) { + if (isConstantTextureBuffer(subExprType) || + hlsl::IsHLSLNodeType(subExprType)) { return collectArrayStructIndices(subExpr, rawIndex, rawIndices, indices, isMSOutAttribute); } @@ -9046,6 +9200,9 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) { case hlsl::IntrinsicOp::IOP_udot: retVal = processIntrinsicDot(callExpr); break; + case hlsl::IntrinsicOp::IOP_Barrier: + retVal = processIntrinsicBarrier(callExpr); + break; case hlsl::IntrinsicOp::IOP_GroupMemoryBarrier: retVal = processIntrinsicMemoryBarrier(callExpr, /*isDevice*/ false, @@ -9078,6 +9235,9 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) { /*groupSync*/ true, /*isAllBarrier*/ true); break; + case hlsl::IntrinsicOp::IOP_GetRemainingRecursionLevels: + retVal = processIntrinsicGetRemainingRecursionLevels(callExpr); + break; case hlsl::IntrinsicOp::IOP_CheckAccessFullyMapped: retVal = spvBuilder.createImageSparseTexelsResident( doExpr(callExpr->getArg(0)), srcLoc, srcRange); @@ -9161,10 +9321,10 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) { retVal = processRawBufferStore(callExpr); break; case hlsl::IntrinsicOp::IOP_Vkext_execution_mode: - retVal = processIntrinsicExecutionMode(callExpr, false); + retVal = processIntrinsicExecutionMode(callExpr); break; case hlsl::IntrinsicOp::IOP_Vkext_execution_mode_id: - retVal = processIntrinsicExecutionMode(callExpr, true); + retVal = processIntrinsicExecutionModeId(callExpr); break; case hlsl::IntrinsicOp::IOP_saturate: retVal = processIntrinsicSaturate(callExpr); @@ -9483,12 +9643,17 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) { retVal = processIntrinsicPointerCast(callExpr, true); break; } - INTRINSIC_SPIRV_OP_CASE(ddx, DPdx, true); - INTRINSIC_SPIRV_OP_CASE(ddx_coarse, DPdxCoarse, false); - INTRINSIC_SPIRV_OP_CASE(ddx_fine, DPdxFine, false); - INTRINSIC_SPIRV_OP_CASE(ddy, DPdy, true); - INTRINSIC_SPIRV_OP_CASE(ddy_coarse, DPdyCoarse, false); - INTRINSIC_SPIRV_OP_CASE(ddy_fine, DPdyFine, false); + case hlsl::IntrinsicOp::IOP_ddx: + case hlsl::IntrinsicOp::IOP_ddx_coarse: + case hlsl::IntrinsicOp::IOP_ddx_fine: + case hlsl::IntrinsicOp::IOP_ddy: + case hlsl::IntrinsicOp::IOP_ddy_coarse: + case hlsl::IntrinsicOp::IOP_ddy_fine: { + retVal = processDerivativeIntrinsic(hlslOpcode, callExpr->getArg(0), + callExpr->getExprLoc(), + callExpr->getSourceRange()); + break; + } INTRINSIC_SPIRV_OP_CASE(countbits, BitCount, false); INTRINSIC_SPIRV_OP_CASE(fmod, FRem, true); INTRINSIC_SPIRV_OP_CASE(fwidth, Fwidth, true); @@ -9549,6 +9714,15 @@ SpirvEmitter::processIntrinsicCallExpr(const CallExpr *callExpr) { return retVal; } +SpirvInstruction *SpirvEmitter::processIntrinsicGetRecordCount( + const CXXMemberCallExpr *callExpr) { + assert(callExpr->getNumArgs() == 0); + const auto obj = callExpr->getImplicitObjectArgument(); + const auto loc = callExpr->getExprLoc(); + SpirvInstruction *payload = doExpr(obj); + return spvBuilder.createNodePayloadArrayLength(payload, loc); +} + SpirvInstruction * SpirvEmitter::processIntrinsicFirstbit(const CallExpr *callExpr, GLSLstd450 glslOpcode) { @@ -9571,6 +9745,76 @@ SpirvEmitter::processIntrinsicFirstbit(const CallExpr *callExpr, srcRange); } +SpirvInstruction *SpirvEmitter::processMatrixDerivativeIntrinsic( + hlsl::IntrinsicOp hlslOpcode, const Expr *arg, SourceLocation loc, + SourceRange range) { + const auto actOnEachVec = [this, hlslOpcode, loc, range]( + uint32_t /*index*/, QualType inType, + QualType outType, SpirvInstruction *curRow) { + return processDerivativeIntrinsic(hlslOpcode, curRow, loc, range); + }; + + return processEachVectorInMatrix(arg, arg->getType(), doExpr(arg), + actOnEachVec, loc, range); +} + +SpirvInstruction * +SpirvEmitter::processDerivativeIntrinsic(hlsl::IntrinsicOp hlslOpcode, + const Expr *arg, SourceLocation loc, + SourceRange range) { + if (isMxNMatrix(arg->getType())) { + return processMatrixDerivativeIntrinsic(hlslOpcode, arg, loc, range); + } + return processDerivativeIntrinsic(hlslOpcode, doExpr(arg), loc, range); +} + +SpirvInstruction *SpirvEmitter::processDerivativeIntrinsic( + hlsl::IntrinsicOp hlslOpcode, SpirvInstruction *arg, SourceLocation loc, + SourceRange range) { + QualType returnType = arg->getAstResultType(); + assert(isFloatOrVecOfFloatType(returnType)); + + addDerivativeGroupExecutionMode(); + needsLegalization = true; + + QualType B32Type = astContext.FloatTy; + uint32_t vectorSize = 0; + QualType elementType = returnType; + if (isVectorType(returnType, &elementType, &vectorSize)) { + B32Type = astContext.getExtVectorType(B32Type, vectorSize); + } + + // Derivative operations work on 32-bit floats only. Cast to 32-bit if needed. + SpirvInstruction *operand = castToType(arg, returnType, B32Type, loc, range); + + spv::Op opcode = spv::Op::OpNop; + switch (hlslOpcode) { + case hlsl::IntrinsicOp::IOP_ddx: + opcode = spv::Op::OpDPdx; + break; + case hlsl::IntrinsicOp::IOP_ddx_coarse: + opcode = spv::Op::OpDPdxCoarse; + break; + case hlsl::IntrinsicOp::IOP_ddx_fine: + opcode = spv::Op::OpDPdxFine; + break; + case hlsl::IntrinsicOp::IOP_ddy: + opcode = spv::Op::OpDPdy; + break; + case hlsl::IntrinsicOp::IOP_ddy_coarse: + opcode = spv::Op::OpDPdyCoarse; + break; + case hlsl::IntrinsicOp::IOP_ddy_fine: + opcode = spv::Op::OpDPdyFine; + break; + }; + + SpirvInstruction *result = + spvBuilder.createUnaryOp(opcode, B32Type, operand, loc, range); + result = castToType(result, B32Type, returnType, loc, range); + return result; +} + // Returns true is the given expression can be used as an output parameter. // // Warning: this function could return false negatives. @@ -10926,38 +11170,202 @@ SpirvEmitter::processIntrinsicPointerCast(const CallExpr *callExpr, SpirvInstruction *SpirvEmitter::processIntrinsicGetBufferContents( const CXXMemberCallExpr *callExpr) { - LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions, - spvBuilder); - Expr *obj = callExpr->getImplicitObjectArgument(); - SpirvInstruction *bufferPointer = doExpr(obj); + SpirvInstruction *bufferPointer = + doExpr(callExpr->getImplicitObjectArgument()); if (!bufferPointer) return nullptr; - if (bufferPointer->isRValue()) { - bufferPointer->setRValue(false); - bufferPointer->setStorageClass(spv::StorageClass::PhysicalStorageBuffer); - return bufferPointer; - } - - unsigned align = hlsl::GetVKBufferPointerAlignment(obj->getType()); - lowerTypeVisitor.visitInstruction(bufferPointer); - - const SpirvPointerType *bufferPointerType = - dyn_cast(bufferPointer->getResultType()); - SpirvLoad *retVal = - spvBuilder.createLoad(bufferPointerType->getPointeeType(), bufferPointer, - callExpr->getLocStart()); - if (!align) { - QualType bufferType = hlsl::GetVKBufferPointerBufferType(obj->getType()); - AlignmentSizeCalculator alignmentCalc(astContext, spirvOptions); - uint32_t stride; - std::tie(align, std::ignore) = alignmentCalc.getAlignmentAndSize( - bufferType, retVal->getLayoutRule(), llvm::None, &stride); - } - retVal->setAlignment(align); + + SpirvInstruction *retVal = + bufferPointer->isRValue() + ? bufferPointer + : spvBuilder.createLoad(bufferPointer->getAstResultType(), + bufferPointer, callExpr->getLocStart()); retVal->setRValue(false); + retVal->setStorageClass(spv::StorageClass::PhysicalStorageBuffer); + retVal->setLayoutRule(spirvOptions.sBufferLayoutRule); return retVal; } +SpirvInstruction *SpirvEmitter::processIntrinsicExtractRecordStruct( + const CXXMemberCallExpr *callExpr) { + Expr *obj = callExpr->getImplicitObjectArgument(); + QualType objType = obj->getType(); + unsigned n = callExpr->getNumArgs(); + assert(hlsl::IsHLSLNodeType(objType)); + assert(n == 0 || n == 1 && hlsl::IsHLSLNodeRecordArrayType(objType)); + + QualType recordType = hlsl::GetHLSLNodeIOResultType(astContext, objType); + SpirvInstruction *res = doExpr(obj); + SpirvInstruction *index = + n ? doExpr(callExpr->getArg(0)) + : spvBuilder.getConstantInt(astContext.UnsignedIntTy, + llvm::APInt(32, 0)); + res->setLayoutRule(SpirvLayoutRule::Scalar); + + return spvBuilder.createAccessChain(recordType, res, {index}, + callExpr->getExprLoc(), + callExpr->getSourceRange()); +} + +SpirvInstruction *SpirvEmitter::processIntrinsicGetRemainingRecursionLevels( + const CallExpr *callExpr) { + assert(callExpr->getNumArgs() == 0); + const auto loc = callExpr->getExprLoc(); + const QualType retType = callExpr->getCallReturnType(astContext); + auto *var = declIdMapper.getBuiltinVar( + spv::BuiltIn::RemainingRecursionLevelsAMDX, retType, loc); + return spvBuilder.createLoad(retType, var, loc); +} + +SpirvInstruction * +SpirvEmitter::processIntrinsicIsValid(const CXXMemberCallExpr *callExpr) { + assert(callExpr->getNumArgs() == 0); + const auto loc = callExpr->getExprLoc(); + const Expr *nodeOutputExpr = callExpr->getImplicitObjectArgument(); + Expr *baseExpr = const_cast(nodeOutputExpr); + SpirvInstruction *shaderIndex = nullptr; + + if (const auto subExpr = dyn_cast_or_null( + nodeOutputExpr->IgnoreParenNoopCasts(astContext))) { + if (subExpr->getOperator() == OverloadedOperatorKind::OO_Subscript) { + // special case: offset shader index by the array subscript + shaderIndex = doExpr(subExpr->getArg(1)); + baseExpr = const_cast(subExpr->getArg(0)); + } + } + + const auto *declRefExpr = dyn_cast(baseExpr->IgnoreImpCasts()); + const auto *paramDecl = dyn_cast(declRefExpr->getDecl()); + int nodeIndex = 0; + if (HLSLNodeIdAttr *nodeId = paramDecl->getAttr()) { + nodeIndex = nodeId->getArrayIndex(); + } + + SpirvInstruction *payload = doExpr(baseExpr); + if (!shaderIndex) { + shaderIndex = spvBuilder.getConstantInt(astContext.UnsignedIntTy, + llvm::APInt(32, nodeIndex)); + } + + return spvBuilder.createIsNodePayloadValid(payload, shaderIndex, loc); +} + +SpirvInstruction *SpirvEmitter::processIntrinsicGetNodeOutputRecords( + const CXXMemberCallExpr *callExpr, bool isGroupShared) { + assert(callExpr->getNumArgs() == 1); + const auto loc = callExpr->getExprLoc(); + const Expr *nodeOutputExpr = callExpr->getImplicitObjectArgument(); + Expr *baseExpr = const_cast(nodeOutputExpr); + SpirvInstruction *shaderIndex = nullptr; + + if (const auto subExpr = dyn_cast_or_null( + nodeOutputExpr->IgnoreParenNoopCasts(astContext))) { + if (subExpr->getOperator() == OverloadedOperatorKind::OO_Subscript) { + // special case: offset shader index by the array subscript + shaderIndex = doExpr(subExpr->getArg(1)); + baseExpr = const_cast(subExpr->getArg(0)); + } + } + + const auto *declRefExpr = dyn_cast(baseExpr->IgnoreImpCasts()); + const auto *paramDecl = dyn_cast(declRefExpr->getDecl()); + if (!shaderIndex) { + shaderIndex = + spvBuilder.getConstantInt(astContext.UnsignedIntTy, llvm::APInt(32, 0)); + } + + LowerTypeVisitor lowerTypeVisitor(astContext, spvContext, spirvOptions, + spvBuilder); + const SpirvType *elemType = lowerTypeVisitor.lowerType( + hlsl::GetHLSLNodeIOResultType(astContext, baseExpr->getType()), + clang::spirv::SpirvLayoutRule::Scalar, llvm::None, + paramDecl->getLocation()); + const SpirvType *payloadType = spvContext.getPointerType( + spvContext.getNodePayloadArrayType(elemType, paramDecl), + spv::StorageClass::NodePayloadAMDX); + + spv::Scope scope = + isGroupShared ? spv::Scope::Workgroup : spv::Scope::Invocation; + SpirvInstruction *recordCount = doExpr(callExpr->getArg(0)); + SpirvInstruction *result = spvBuilder.createAllocateNodePayloads( + callExpr->getType(), scope, shaderIndex, recordCount, loc); + result->setResultType(payloadType); + spvContext.addToInstructionsWithLoweredType(result); + return result; +} + +SpirvInstruction *SpirvEmitter::processIntrinsicIncrementOutputCount( + const CXXMemberCallExpr *callExpr, bool isGroupShared) { + return processIntrinsicGetNodeOutputRecords(callExpr, isGroupShared); +} + +void SpirvEmitter::processIntrinsicOutputComplete( + const CXXMemberCallExpr *callExpr) { + Expr *payloadExpr = + callExpr->getImplicitObjectArgument()->IgnoreParenNoopCasts(astContext); + SpirvInstruction *payload = doExpr(payloadExpr); + spvBuilder.createEnqueueOutputNodePayloads(payload, callExpr->getExprLoc()); +} + +SpirvInstruction *SpirvEmitter::processIntrinsicFinishedCrossGroupSharing( + const CXXMemberCallExpr *callExpr) { + Expr *payloadExpr = callExpr->getImplicitObjectArgument(); + SpirvInstruction *payload = doExpr(payloadExpr); + return spvBuilder.createFinishWritingNodePayload(payload, + callExpr->getExprLoc()); +} + +SpirvInstruction * +SpirvEmitter::processIntrinsicBarrier(const CallExpr *callExpr) { + llvm::APSInt a1(32, true), a2(32, true); + int64_t i1, i2; + const Expr *e1 = callExpr->getArg(0), *e2 = callExpr->getArg(1); + + // object as first argument + if (!e1->EvaluateAsInt(a1, astContext)) { + assert(e1->getType()->isStructureOrClassType()); + a1.setAllBits(); + } + + if (e2->EvaluateAsInt(a2, astContext) && (i1 = a1.getExtValue()) >= 0 && + (i2 = a2.getExtValue()) >= 0) { + } else { + emitError("Barrier arguments must be non-negative integer constants", + callExpr->getExprLoc()); + return nullptr; + } + + if (!(i1 | i2)) { // all zero -> no-op + return nullptr; + } + + spv::Scope memScope = + (i2 & (unsigned)hlsl::DXIL::BarrierSemanticFlag::DeviceScope) + ? spv::Scope::Device + : (i2 & (unsigned)hlsl::DXIL::BarrierSemanticFlag::GroupScope) + ? spv::Scope::Workgroup + : spv::Scope::Invocation; + spv::MemorySemanticsMask memSemaMask = + spv::MemorySemanticsMask::AcquireRelease | + ((i1 & (unsigned)hlsl::DXIL::MemoryTypeFlag::UavMemory) + ? spv::MemorySemanticsMask::UniformMemory + : spv::MemorySemanticsMask::MaskNone) | + ((i1 & (unsigned)hlsl::DXIL::MemoryTypeFlag::GroupSharedMemory) + ? spv::MemorySemanticsMask::WorkgroupMemory + : spv::MemorySemanticsMask::MaskNone) | + ((i1 & (unsigned)hlsl::DXIL::MemoryTypeFlag::NodeOutputMemory) + ? spv::MemorySemanticsMask::OutputMemory + : spv::MemorySemanticsMask::MaskNone); + Optional execScope = + (i2 & (unsigned)hlsl::DXIL::BarrierSemanticFlag::GroupSync) + ? Optional(spv::Scope::Workgroup) + : None; + + spvBuilder.createBarrier(memScope, memSemaMask, execScope, + callExpr->getExprLoc()); + return nullptr; +} + SpirvInstruction * SpirvEmitter::processIntrinsicMemoryBarrier(const CallExpr *callExpr, bool isDevice, bool groupSync, @@ -11283,8 +11691,8 @@ SpirvInstruction *SpirvEmitter::processIntrinsicMul(const CallExpr *callExpr) { uint32_t numRows = 0; if (isMxNMatrix(returnType, &elemType, &numRows)) { llvm::SmallVector rows; - auto *arg0Id = doExpr(arg0); - auto *arg1Id = doExpr(arg1); + auto *arg0Id = doExprEnsuringRValue(arg0, loc, range); + auto *arg1Id = doExprEnsuringRValue(arg1, loc, range); for (uint32_t i = 0; i < numRows; ++i) { auto *scalar = spvBuilder.createCompositeExtract(elemType, arg0Id, {i}, loc, range); @@ -11299,8 +11707,8 @@ SpirvInstruction *SpirvEmitter::processIntrinsicMul(const CallExpr *callExpr) { } // All the following cases require handling arg0 and arg1 expressions first. - auto *arg0Id = doExpr(arg0); - auto *arg1Id = doExpr(arg1); + auto *arg0Id = doExprEnsuringRValue(arg0, loc, range); + auto *arg1Id = doExprEnsuringRValue(arg1, loc, range); // mul(scalar, scalar) if (isScalarType(arg0Type) && isScalarType(arg1Type)) @@ -12095,8 +12503,7 @@ SpirvInstruction *SpirvEmitter::processIntrinsicUsingSpirvInst( case spv::Op::OpFwidth: case spv::Op::OpFwidthFine: case spv::Op::OpFwidthCoarse: - if (spvContext.isCS()) - addDerivativeGroupExecutionMode(); + addDerivativeGroupExecutionMode(); needsLegalization = true; break; default: @@ -12931,7 +13338,7 @@ void SpirvEmitter::processDispatchMesh(const CallExpr *callExpr) { : spv::StorageClass::Output; auto *payloadArg = doExpr(args[3]); bool isValid = false; - const VarDecl *param = nullptr; + SpirvInstruction *param = nullptr; if (const auto *implCastExpr = dyn_cast(args[3])) { if (const auto *arg = dyn_cast(implCastExpr->getSubExpr())) { if (const auto *paramDecl = dyn_cast(arg->getDecl())) { @@ -12939,7 +13346,8 @@ void SpirvEmitter::processDispatchMesh(const CallExpr *callExpr) { isValid = declIdMapper.createPayloadStageVars( sigPoint, sc, paramDecl, /*asInput=*/false, paramDecl->getType(), "out.var", &payloadArg); - param = paramDecl; + param = + declIdMapper.getDeclEvalInfo(paramDecl, paramDecl->getLocation()); } } } @@ -12956,7 +13364,7 @@ void SpirvEmitter::processDispatchMesh(const CallExpr *callExpr) { if (featureManager.isExtensionEnabled(Extension::EXT_mesh_shader)) { // for EXT_mesh_shader, create opEmitMeshTasksEXT. - spvBuilder.createEmitMeshTasksEXT(threadX, threadY, threadZ, loc, nullptr, + spvBuilder.createEmitMeshTasksEXT(threadX, threadY, threadZ, loc, param, range); } else { // for NV_mesh_shader, set TaskCountNV = threadX * threadY * threadZ. @@ -13180,6 +13588,7 @@ hlsl::ShaderModel::Kind SpirvEmitter::getShaderModelKind(StringRef stageName) { .Case("callable", hlsl::ShaderModel::Kind::Callable) .Case("mesh", hlsl::ShaderModel::Kind::Mesh) .Case("amplification", hlsl::ShaderModel::Kind::Amplification) + .Case("node", hlsl::ShaderModel::Kind::Node) .Default(hlsl::ShaderModel::Kind::Invalid); assert(SMK != hlsl::ShaderModel::Kind::Invalid); return SMK; @@ -13200,6 +13609,7 @@ SpirvEmitter::getSpirvShaderStage(hlsl::ShaderModel::Kind smk, case hlsl::ShaderModel::Kind::Pixel: return spv::ExecutionModel::Fragment; case hlsl::ShaderModel::Kind::Compute: + case hlsl::ShaderModel::Kind::Node: return spv::ExecutionModel::GLCompute; case hlsl::ShaderModel::Kind::RayGeneration: return spv::ExecutionModel::RayGenerationNV; @@ -13420,6 +13830,21 @@ void SpirvEmitter::processPixelShaderAttributes(const FunctionDecl *decl) { } } +void SpirvEmitter::checkForWaveSizeAttr(const FunctionDecl *decl) { + if (auto *waveSizeAttr = decl->getAttr()) { + // Not supported in Vulkan SPIR-V, warn and ignore. + + // SPIR-V SubgroupSize execution mode would work but it is Kernel only + // (requires the SubgroupDispatch capability, which implies the + // DeviceEnqueue capability, which is Kernel only). Subgroup sizes can be + // specified in Vulkan on the application side via + // VK_EXT_subgroup_size_control. + emitWarning("Wave size is not supported by Vulkan SPIR-V. Consider using " + "VK_EXT_subgroup_size_control.", + waveSizeAttr->getLocation()); + } +} + void SpirvEmitter::processComputeShaderAttributes(const FunctionDecl *decl) { auto *numThreadsAttr = decl->getAttr(); assert(numThreadsAttr && "thread group size missing from entry-point"); @@ -13431,19 +13856,82 @@ void SpirvEmitter::processComputeShaderAttributes(const FunctionDecl *decl) { spvBuilder.addExecutionMode(entryFunction, spv::ExecutionMode::LocalSize, {x, y, z}, decl->getLocation()); - auto *waveSizeAttr = decl->getAttr(); - if (waveSizeAttr) { - // Not supported in Vulkan SPIR-V, warn and ignore. + checkForWaveSizeAttr(decl); +} - // SPIR-V SubgroupSize execution mode would work but it is Kernel only - // (requires the SubgroupDispatch capability, which implies the - // DeviceEnqueue capability, which is Kernel only). Subgroup sizes can be - // specified in Vulkan on the application side via - // VK_EXT_subgroup_size_control. - emitWarning("Wave size is not supported by Vulkan SPIR-V. Consider using " - "VK_EXT_subgroup_size_control.", - waveSizeAttr->getLocation()); +void SpirvEmitter::processNodeShaderAttributes(const FunctionDecl *decl) { + uint32_t x = 1, y = 1, z = 1; + if (auto *numThreadsAttr = decl->getAttr()) { + x = static_cast(numThreadsAttr->getX()); + y = static_cast(numThreadsAttr->getY()); + z = static_cast(numThreadsAttr->getZ()); + } + spvBuilder.addExecutionMode(entryFunction, spv::ExecutionMode::LocalSize, + {x, y, z}, decl->getLocation()); + + auto *nodeLaunchAttr = decl->getAttr(); + StringRef launchType = nodeLaunchAttr ? nodeLaunchAttr->getLaunchType() : ""; + if (launchType.equals("coalescing") || launchType.equals("thread")) { + spvBuilder.addExecutionMode(entryFunction, + spv::ExecutionMode::CoalescingAMDX, {}, + decl->getLocation()); } + + uint64_t nodeId = 0; + if (const auto nodeIdAttr = decl->getAttr()) + nodeId = static_cast(nodeIdAttr->getArrayIndex()); + spvBuilder.addExecutionModeId( + entryFunction, spv::ExecutionMode::ShaderIndexAMDX, + {spvBuilder.getConstantInt(astContext.UnsignedIntTy, + llvm::APInt(32, nodeId))}, + decl->getLocation()); + + if (const auto *nodeMaxRecursionDepthAttr = + decl->getAttr()) { + SpirvInstruction *count = spvBuilder.getConstantInt( + astContext.UnsignedIntTy, + llvm::APInt(32, nodeMaxRecursionDepthAttr->getCount())); + spvBuilder.addExecutionModeId(entryFunction, + spv::ExecutionMode::MaxNodeRecursionAMDX, + {count}, decl->getLocation()); + } + + if (const auto *nodeShareInputOfAttr = + decl->getAttr()) { + SpirvInstruction *name = + spvBuilder.getConstantString(nodeShareInputOfAttr->getName()); + SpirvInstruction *index = spvBuilder.getConstantInt( + astContext.UnsignedIntTy, + llvm::APInt(32, nodeShareInputOfAttr->getArrayIndex())); + spvBuilder.addExecutionModeId(entryFunction, + spv::ExecutionMode::SharesInputWithAMDX, + {name, index}, decl->getLocation()); + } + + if (const auto *dispatchGrid = decl->getAttr()) { + SpirvInstruction *gridX = spvBuilder.getConstantInt( + astContext.UnsignedIntTy, llvm::APInt(32, dispatchGrid->getX())); + SpirvInstruction *gridY = spvBuilder.getConstantInt( + astContext.UnsignedIntTy, llvm::APInt(32, dispatchGrid->getY())); + SpirvInstruction *gridZ = spvBuilder.getConstantInt( + astContext.UnsignedIntTy, llvm::APInt(32, dispatchGrid->getZ())); + spvBuilder.addExecutionModeId(entryFunction, + spv::ExecutionMode::StaticNumWorkgroupsAMDX, + {gridX, gridY, gridZ}, decl->getLocation()); + } else if (const auto *maxDispatchGrid = + decl->getAttr()) { + SpirvInstruction *gridX = spvBuilder.getConstantInt( + astContext.UnsignedIntTy, llvm::APInt(32, maxDispatchGrid->getX())); + SpirvInstruction *gridY = spvBuilder.getConstantInt( + astContext.UnsignedIntTy, llvm::APInt(32, maxDispatchGrid->getY())); + SpirvInstruction *gridZ = spvBuilder.getConstantInt( + astContext.UnsignedIntTy, llvm::APInt(32, maxDispatchGrid->getZ())); + spvBuilder.addExecutionModeId(entryFunction, + spv::ExecutionMode::MaxNumWorkgroupsAMDX, + {gridX, gridY, gridZ}, decl->getLocation()); + } + + checkForWaveSizeAttr(decl); } bool SpirvEmitter::processTessellationShaderAttributes( @@ -13535,8 +14023,8 @@ bool SpirvEmitter::processTessellationShaderAttributes( } bool SpirvEmitter::emitEntryFunctionWrapperForRayTracing( - const FunctionDecl *decl, SpirvDebugFunction *debugFunction, - SpirvFunction *entryFuncInstr) { + const FunctionDecl *decl, RichDebugInfo **info, + SpirvDebugFunction *debugFunction, SpirvFunction *entryFuncInstr) { // The entry basic block. auto *entryLabel = spvBuilder.createBasicBlock(); spvBuilder.setInsertPoint(entryLabel); @@ -13645,6 +14133,10 @@ bool SpirvEmitter::emitEntryFunctionWrapperForRayTracing( spvBuilder.createReturn(decl->getBody()->getLocEnd()); spvBuilder.endFunction(); + if (spirvOptions.debugInfoRich && decl->hasBody()) { + spvContext.popDebugLexicalScope(*info); + } + return true; } @@ -13859,7 +14351,9 @@ SpirvFunction *SpirvEmitter::emitEntryFunctionWrapper( astContext.VoidTy, decl->getLocStart(), decl->getName()); if (spirvOptions.debugInfoRich && decl->hasBody()) { - *debugFunction = emitDebugFunction(decl, entryFunction, info, "wrapper"); + *debugFunction = + emitDebugFunction(decl, entryFunction, info, "__dxc_setup"); + spvContext.pushDebugLexicalScope(*info, *debugFunction); } // Specify that entryFunction is an entry function wrapper. @@ -13876,7 +14370,7 @@ SpirvFunction *SpirvEmitter::emitEntryFunctionWrapper( entryInfo->entryFunction = entryFunction; if (spvContext.isRay()) { - return emitEntryFunctionWrapperForRayTracing(decl, *debugFunction, + return emitEntryFunctionWrapperForRayTracing(decl, info, *debugFunction, entryFuncInstr) ? entryFunction : nullptr; @@ -13886,6 +14380,8 @@ SpirvFunction *SpirvEmitter::emitEntryFunctionWrapper( processPixelShaderAttributes(decl); } else if (spvContext.isCS()) { processComputeShaderAttributes(decl); + } else if (spvContext.isNode()) { + processNodeShaderAttributes(decl); } else if (spvContext.isHS()) { if (!processTessellationShaderAttributes(decl, &numOutputControlPoints)) return nullptr; @@ -13994,12 +14490,23 @@ SpirvFunction *SpirvEmitter::emitEntryFunctionWrapper( llvm::SmallVector params; for (const auto *param : decl->params()) { const auto paramType = param->getType(); + if (hlsl::IsHLSLNodeInputType(paramType)) { + SpirvInstruction *value = nullptr; + if (!declIdMapper.createStageInputVar(param, &value, false)) + return nullptr; + if (value && value->getKind() == SpirvInstruction::Kind::IK_Variable) { + handleNodePayloadArrayType(param, value); + params.push_back(value); + } + continue; + } + std::string tempVarName = "param.var." + param->getNameAsString(); auto *tempVar = spvBuilder.addFnVar(paramType, param->getLocation(), tempVarName, param->hasAttr(), param->hasAttr()); - + handleNodePayloadArrayType(param, tempVar); params.push_back(tempVar); // Create the stage input variable for parameter not marked as pure out and @@ -14017,6 +14524,9 @@ SpirvFunction *SpirvEmitter::emitEntryFunctionWrapper( if (!declIdMapper.createStageInputVar(param, &loadedValue, false)) return nullptr; + if (loadedValue) { + handleNodePayloadArrayType(param, loadedValue); + } // Only initialize the temporary variable if the parameter is indeed used, // or if it is an inout parameter. @@ -14101,6 +14611,10 @@ SpirvFunction *SpirvEmitter::emitEntryFunctionWrapper( if (spvContext.isHS()) doDecl(patchConstFunc); + if (spirvOptions.debugInfoRich && decl->hasBody()) { + spvContext.popDebugLexicalScope(*info); + } + return entryFunction; } @@ -14880,8 +15394,12 @@ SpirvEmitter::createSpirvIntrInstExt(llvm::ArrayRef attrs, SpirvInstruction *SpirvEmitter::invertYIfRequested(SpirvInstruction *position, SourceLocation loc, SourceRange range) { - // Negate SV_Position.y if requested - if (spirvOptions.invertY) { + // Negate SV_Position.y if requested and supported + + bool supportsInvertY = spvContext.isVS() || spvContext.isGS() || + spvContext.isDS() || spvContext.isMS(); + + if (spirvOptions.invertY && supportsInvertY) { const auto oldY = spvBuilder.createCompositeExtract( astContext.FloatTy, position, {1}, loc, range); const auto newY = spvBuilder.createUnaryOp( @@ -15120,8 +15638,7 @@ SpirvEmitter::processCooperativeMatrixGetLength(const CallExpr *call) { } SpirvInstruction * -SpirvEmitter::processIntrinsicExecutionMode(const CallExpr *expr, - bool useIdParams) { +SpirvEmitter::processIntrinsicExecutionMode(const CallExpr *expr) { llvm::SmallVector execModesParams; uint32_t exeMode = 0; const auto args = expr->getArgs(); @@ -15145,9 +15662,38 @@ SpirvEmitter::processIntrinsicExecutionMode(const CallExpr *expr, assert(entryFunction != nullptr); assert(exeMode != 0); - return spvBuilder.addExecutionMode( - entryFunction, static_cast(exeMode), execModesParams, - expr->getExprLoc(), useIdParams); + return spvBuilder.addExecutionMode(entryFunction, + static_cast(exeMode), + execModesParams, expr->getExprLoc()); +} + +SpirvInstruction * +SpirvEmitter::processIntrinsicExecutionModeId(const CallExpr *expr) { + assert(expr->getNumArgs() > 0); + uint32_t exeMode = 0; + const Expr *modeExpr = expr->getArg(0); + Expr::EvalResult evalResult; + if (modeExpr->EvaluateAsRValue(evalResult, astContext) && + !evalResult.HasSideEffects && evalResult.Val.isInt()) { + exeMode = evalResult.Val.getInt().getZExtValue(); + } else { + emitError("The execution mode must be constant integer", + expr->getExprLoc()); + return nullptr; + } + + llvm::SmallVector execModesParams; + const auto args = expr->getArgs(); + for (uint32_t i = 1; i < expr->getNumArgs(); ++i) { + const Expr *argExpr = args[i]; + SpirvInstruction *argInst = doExpr(argExpr); + execModesParams.push_back(argInst); + } + + assert(entryFunction != nullptr); + return spvBuilder.addExecutionModeId(entryFunction, + static_cast(exeMode), + execModesParams, expr->getExprLoc()); } SpirvInstruction * @@ -15215,11 +15761,33 @@ bool SpirvEmitter::spirvToolsValidate(std::vector *mod, return tools.Validate(mod->data(), mod->size(), options); } +static bool canUseDerivativeGroupExecutionMode(SpirvContext::ShaderModelKind sm, + bool usingEXTMeshShader) { + switch (sm) { + case SpirvContext::ShaderModelKind::Compute: + case SpirvContext::ShaderModelKind::Node: + return true; + + // The KHR extension that allows derivative instruction in mesh and task + // (amplification) shader does not work with SPV_NV_mesh_shader extesion. + case SpirvContext::ShaderModelKind::Mesh: + case SpirvContext::ShaderModelKind::Amplification: + return usingEXTMeshShader; + default: + return false; + } +} + void SpirvEmitter::addDerivativeGroupExecutionMode() { - assert(spvContext.isCS()); + bool usingEXTMeshShader = + featureManager.isExtensionEnabled(Extension::EXT_mesh_shader); + SpirvContext::ShaderModelKind sm = spvContext.getCurrentShaderModelKind(); + if (!canUseDerivativeGroupExecutionMode(sm, usingEXTMeshShader)) + return; - SpirvExecutionMode *numThreadsEm = spvBuilder.getModule()->findExecutionMode( - entryFunction, spv::ExecutionMode::LocalSize); + SpirvExecutionMode *numThreadsEm = + cast(spvBuilder.getModule()->findExecutionMode( + entryFunction, spv::ExecutionMode::LocalSize)); auto numThreads = numThreadsEm->getParams(); // The layout of the quad is determined by the numer of threads in each diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.h b/tools/clang/lib/SPIRV/SpirvEmitter.h index 79d2c43c35..ada8db3068 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.h +++ b/tools/clang/lib/SPIRV/SpirvEmitter.h @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// // // This file defines a SPIR-V emitter class that takes in HLSL AST and emits @@ -83,6 +80,9 @@ class SpirvEmitter : public ASTConsumer { void doDecl(const Decl *decl); void doStmt(const Stmt *stmt, llvm::ArrayRef attrs = {}); SpirvInstruction *doExpr(const Expr *expr, SourceRange rangeOverride = {}); + SpirvInstruction *doExprEnsuringRValue(const Expr *expr, + SourceLocation location, + SourceRange range); /// Processes the given expression and emits SPIR-V instructions. If the /// result is a GLValue, does an additional load. @@ -126,6 +126,8 @@ class SpirvEmitter : public ASTConsumer { SourceRange range = {}); private: + bool handleNodePayloadArrayType(const ParmVarDecl *decl, + SpirvInstruction *instr); void doFunctionDecl(const FunctionDecl *decl); void doVarDecl(const VarDecl *decl); void doRecordDecl(const RecordDecl *decl); @@ -176,7 +178,8 @@ class SpirvEmitter : public ASTConsumer { /// Overload with pre computed SpirvEvalInfo. /// /// The given expr will not be evaluated again. - SpirvInstruction *loadIfGLValue(const Expr *expr, SpirvInstruction *info); + SpirvInstruction *loadIfGLValue(const Expr *expr, SpirvInstruction *info, + SourceRange rangeOverride = {}); /// Loads the pointer of the aliased-to-variable if the given expression is a /// DeclRefExpr referencing an alias variable. See DeclResultIdMapper for @@ -225,6 +228,8 @@ class SpirvEmitter : public ASTConsumer { QualType lhsValType, SourceLocation loc, SourceRange range = {}); + bool canUseOpCopyLogical(QualType type) const; + /// Decomposes and reconstructs the given srcVal of the given valType to meet /// the requirements of the dstLR layout rule. SpirvInstruction *reconstructValue(SpirvInstruction *srcVal, QualType valType, @@ -504,6 +509,9 @@ class SpirvEmitter : public ASTConsumer { SpirvInstruction * processIntrinsicGetBufferContents(const CXXMemberCallExpr *); + /// Processes the 'Barrier' intrinsic function. + SpirvInstruction *processIntrinsicBarrier(const CallExpr *); + /// Processes the 'GroupMemoryBarrier', 'GroupMemoryBarrierWithGroupSync', /// 'DeviceMemoryBarrier', 'DeviceMemoryBarrierWithGroupSync', /// 'AllMemoryBarrier', and 'AllMemoryBarrierWithGroupSync' intrinsic @@ -512,6 +520,40 @@ class SpirvEmitter : public ASTConsumer { bool isDevice, bool groupSync, bool isAllBarrier); + /// Processes the 'GetRemainingRecursionLevels' intrinsic function. + SpirvInstruction * + processIntrinsicGetRemainingRecursionLevels(const CallExpr *callExpr); + + /// Processes the 'IsValid' intrinsic function. + SpirvInstruction *processIntrinsicIsValid(const CXXMemberCallExpr *callExpr); + + /// Processes the 'Get' intrinsic function for (arrays of) node records and + /// the array subscript operator for node record arrays. + SpirvInstruction * + processIntrinsicExtractRecordStruct(const CXXMemberCallExpr *callExpr); + + /// Processes the 'GetGroupNodeOutputRecords' and 'GetThreadNodeOutputRecords' + /// intrinsic functions. + SpirvInstruction * + processIntrinsicGetNodeOutputRecords(const CXXMemberCallExpr *callExpr, + bool isGroupShared); + + /// Processes the 'IncrementOutputCount' intrinsic function. + SpirvInstruction * + processIntrinsicIncrementOutputCount(const CXXMemberCallExpr *callExpr, + bool isGroupShared); + + /// Processes the 'Count' intrinsic function for node input record arrays. + SpirvInstruction * + processIntrinsicGetRecordCount(const CXXMemberCallExpr *callExpr); + + /// Processes the 'OutputComplete' intrinsic function. + void processIntrinsicOutputComplete(const CXXMemberCallExpr *callExpr); + + /// Processes the 'FinishedCrossGroupSharing' intrinsic function. + SpirvInstruction * + processIntrinsicFinishedCrossGroupSharing(const CXXMemberCallExpr *callExpr); + /// Processes the 'mad' intrinsic function. SpirvInstruction *processIntrinsicMad(const CallExpr *); @@ -781,13 +823,29 @@ class SpirvEmitter : public ASTConsumer { SpirvInstruction *processCooperativeMatrixGetLength(const CallExpr *call); /// Process vk::ext_execution_mode intrinsic - SpirvInstruction *processIntrinsicExecutionMode(const CallExpr *expr, - bool useIdParams); + SpirvInstruction *processIntrinsicExecutionMode(const CallExpr *expr); + /// Process vk::ext_execution_mode_id intrinsic + SpirvInstruction *processIntrinsicExecutionModeId(const CallExpr *expr); /// Processes the 'firstbit{high|low}' intrinsic functions. SpirvInstruction *processIntrinsicFirstbit(const CallExpr *, GLSLstd450 glslOpcode); + SpirvInstruction * + processMatrixDerivativeIntrinsic(hlsl::IntrinsicOp hlslOpcode, + const Expr *arg, SourceLocation loc, + SourceRange range); + + SpirvInstruction *processDerivativeIntrinsic(hlsl::IntrinsicOp hlslOpcode, + const Expr *arg, + SourceLocation loc, + SourceRange range); + + SpirvInstruction *processDerivativeIntrinsic(hlsl::IntrinsicOp hlslOpcode, + SpirvInstruction *arg, + SourceLocation loc, + SourceRange range); + private: /// Returns the for constant value 0 of the given type. SpirvConstant *getValueZero(QualType type); @@ -833,6 +891,7 @@ class SpirvEmitter : public ASTConsumer { static hlsl::ShaderModel::Kind getShaderModelKind(StringRef stageName); static spv::ExecutionModel getSpirvShaderStage(hlsl::ShaderModel::Kind smk, bool); + void checkForWaveSizeAttr(const FunctionDecl *decl); /// \brief Handle inline SPIR-V attributes for the entry function. void processInlineSpirvAttributes(const FunctionDecl *entryFunction); @@ -859,6 +918,10 @@ class SpirvEmitter : public ASTConsumer { /// HLSL attributes of the entry point function. void processComputeShaderAttributes(const FunctionDecl *entryFunction); + /// \brief Adds necessary execution modes for the node shader based on the + /// HLSL attributes of the entry point function. + void processNodeShaderAttributes(const FunctionDecl *entryFunction); + /// \brief Adds necessary execution modes for the mesh/amplification shader /// based on the HLSL attributes of the entry point function. bool @@ -895,6 +958,7 @@ class SpirvEmitter : public ASTConsumer { /// The wrapper function is also responsible for initializing global static /// variables for some cases. bool emitEntryFunctionWrapperForRayTracing(const FunctionDecl *entryFunction, + RichDebugInfo **info, SpirvDebugFunction *debugFunction, SpirvFunction *entryFuncId); diff --git a/tools/clang/lib/SPIRV/SpirvInstruction.cpp b/tools/clang/lib/SPIRV/SpirvInstruction.cpp index f41de03adc..88d669d397 100644 --- a/tools/clang/lib/SPIRV/SpirvInstruction.cpp +++ b/tools/clang/lib/SPIRV/SpirvInstruction.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// // // This file implements the in-memory representation of SPIR-V instructions. @@ -33,7 +30,9 @@ DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvExtension) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvExtInstImport) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvMemoryModel) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvEntryPoint) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvExecutionModeBase) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvExecutionMode) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvExecutionModeId) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvString) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvSource) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvModuleProcessed) @@ -53,6 +52,11 @@ DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvUnreachable) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvAccessChain) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvAtomic) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvBarrier) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvIsNodePayloadValid) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvNodePayloadArrayLength) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvAllocateNodePayloads) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvEnqueueNodePayloads) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvFinishWritingNodePayload) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvBinaryOp) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvBitFieldExtract) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvBitFieldInsert) @@ -60,6 +64,7 @@ DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantBoolean) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantInteger) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantFloat) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantComposite) +DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantString) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConstantNull) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConvertPtrToU) DEFINE_INVOKE_VISITOR_FOR_CLASS(SpirvConvertUToPtr) @@ -207,13 +212,16 @@ SpirvEntryPoint::SpirvEntryPoint(SourceLocation loc, // OpExecutionMode and OpExecutionModeId instructions SpirvExecutionMode::SpirvExecutionMode(SourceLocation loc, SpirvFunction *entry, spv::ExecutionMode em, - llvm::ArrayRef paramsVec, - bool usesIdParams) - : SpirvInstruction(IK_ExecutionMode, - usesIdParams ? spv::Op::OpExecutionModeId - : spv::Op::OpExecutionMode, - QualType(), loc), - entryPoint(entry), execMode(em), + llvm::ArrayRef paramsVec) + : SpirvExecutionModeBase(IK_ExecutionMode, spv::Op::OpExecutionMode, loc, + entry, em), + params(paramsVec.begin(), paramsVec.end()) {} + +SpirvExecutionModeId::SpirvExecutionModeId( + SourceLocation loc, SpirvFunction *entry, spv::ExecutionMode em, + llvm::ArrayRef paramsVec) + : SpirvExecutionModeBase(IK_ExecutionModeId, spv::Op::OpExecutionModeId, + loc, entry, em), params(paramsVec.begin(), paramsVec.end()) {} SpirvString::SpirvString(SourceLocation loc, llvm::StringRef stringLiteral) @@ -467,6 +475,41 @@ SpirvBarrier::SpirvBarrier(SourceLocation loc, spv::Scope memScope, memoryScope(memScope), memorySemantics(memSemantics), executionScope(execScope) {} +SpirvIsNodePayloadValid::SpirvIsNodePayloadValid(QualType resultType, + SourceLocation loc, + SpirvInstruction *payloadArray, + SpirvInstruction *nodeIndex) + : SpirvInstruction(IK_IsNodePayloadValid, spv::Op::OpIsNodePayloadValidAMDX, + resultType, loc), + payloadArray(payloadArray), nodeIndex(nodeIndex) {} + +SpirvNodePayloadArrayLength::SpirvNodePayloadArrayLength( + QualType resultType, SourceLocation loc, SpirvInstruction *payloadArray) + : SpirvInstruction(IK_NodePayloadArrayLength, + spv::Op::OpNodePayloadArrayLengthAMDX, resultType, loc), + payloadArray(payloadArray) {} + +SpirvAllocateNodePayloads::SpirvAllocateNodePayloads( + QualType resultType, SourceLocation loc, spv::Scope allocationScope, + SpirvInstruction *shaderIndex, SpirvInstruction *recordCount) + : SpirvInstruction(IK_AllocateNodePayloads, + spv::Op::OpAllocateNodePayloadsAMDX, resultType, loc), + allocationScope(allocationScope), shaderIndex(shaderIndex), + recordCount(recordCount) {} + +SpirvEnqueueNodePayloads::SpirvEnqueueNodePayloads(SourceLocation loc, + SpirvInstruction *payload) + : SpirvInstruction(IK_EnqueueNodePayloads, + spv::Op::OpEnqueueNodePayloadsAMDX, QualType(), loc), + payload(payload) {} + +SpirvFinishWritingNodePayload::SpirvFinishWritingNodePayload( + QualType resultType, SourceLocation loc, SpirvInstruction *payload) + : SpirvInstruction(IK_FinishWritingNodePayload, + spv::Op::OpFinishWritingNodePayloadAMDX, resultType, + loc), + payload(payload) {} + SpirvBinaryOp::SpirvBinaryOp(spv::Op opcode, QualType resultType, SourceLocation loc, SpirvInstruction *op1, SpirvInstruction *op2, SourceRange range) @@ -563,7 +606,8 @@ bool SpirvConstant::isSpecConstant() const { return opcode == spv::Op::OpSpecConstant || opcode == spv::Op::OpSpecConstantTrue || opcode == spv::Op::OpSpecConstantFalse || - opcode == spv::Op::OpSpecConstantComposite; + opcode == spv::Op::OpSpecConstantComposite || + opcode == spv::Op::OpSpecConstantStringAMDX; } SpirvConstantBoolean::SpirvConstantBoolean(QualType type, bool val, @@ -618,6 +662,19 @@ SpirvConstantComposite::SpirvConstantComposite( type), constituents(constituentsVec.begin(), constituentsVec.end()) {} +SpirvConstantString::SpirvConstantString(llvm::StringRef stringLiteral, + bool isSpecConst) + : SpirvConstant(IK_ConstantString, + isSpecConst ? spv::Op::OpSpecConstantStringAMDX + : spv::Op::OpConstantStringAMDX, + QualType()), + str(stringLiteral) {} + +bool SpirvConstantString::operator==(const SpirvConstantString &that) const { + return opcode == that.opcode && resultType == that.resultType && + str == that.str; +} + SpirvConstantNull::SpirvConstantNull(QualType type) : SpirvConstant(IK_ConstantNull, spv::Op::OpConstantNull, type) {} diff --git a/tools/clang/lib/SPIRV/SpirvModule.cpp b/tools/clang/lib/SPIRV/SpirvModule.cpp index 9c6a826a5b..ed6aca7488 100644 --- a/tools/clang/lib/SPIRV/SpirvModule.cpp +++ b/tools/clang/lib/SPIRV/SpirvModule.cpp @@ -294,9 +294,10 @@ void SpirvModule::addEntryPoint(SpirvEntryPoint *ep) { entryPoints.push_back(ep); } -SpirvExecutionMode *SpirvModule::findExecutionMode(SpirvFunction *entryPoint, - spv::ExecutionMode em) { - for (SpirvExecutionMode *cem : executionModes) { +SpirvExecutionModeBase * +SpirvModule::findExecutionMode(SpirvFunction *entryPoint, + spv::ExecutionMode em) { + for (SpirvExecutionModeBase *cem : executionModes) { if (cem->getEntryPoint() != entryPoint) continue; if (cem->getExecutionMode() != em) @@ -306,7 +307,7 @@ SpirvExecutionMode *SpirvModule::findExecutionMode(SpirvFunction *entryPoint, return nullptr; } -void SpirvModule::addExecutionMode(SpirvExecutionMode *em) { +void SpirvModule::addExecutionMode(SpirvExecutionModeBase *em) { assert(em && "cannot add null execution mode"); executionModes.push_back(em); } diff --git a/tools/clang/lib/SPIRV/SpirvType.cpp b/tools/clang/lib/SPIRV/SpirvType.cpp index cabeba4cda..286e6224a4 100644 --- a/tools/clang/lib/SPIRV/SpirvType.cpp +++ b/tools/clang/lib/SPIRV/SpirvType.cpp @@ -167,6 +167,10 @@ bool RuntimeArrayType::operator==(const RuntimeArrayType &that) const { (!stride.hasValue() || stride.getValue() == that.stride.getValue()); } +bool NodePayloadArrayType::operator==(const NodePayloadArrayType &that) const { + return elementType == that.elementType && nodeDecl == that.nodeDecl; +} + bool SpvIntrinsicTypeOperand::operator==( const SpvIntrinsicTypeOperand &that) const { if (isTypeOperand != that.isTypeOperand) diff --git a/tools/clang/lib/Sema/SemaCast.cpp b/tools/clang/lib/Sema/SemaCast.cpp index f5a864e2b6..dcff6c2461 100644 --- a/tools/clang/lib/Sema/SemaCast.cpp +++ b/tools/clang/lib/Sema/SemaCast.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// // // This file implements semantic analysis for cast expressions, including diff --git a/tools/clang/lib/Sema/SemaChecking.cpp b/tools/clang/lib/Sema/SemaChecking.cpp index 9e64732336..e3932220f9 100644 --- a/tools/clang/lib/Sema/SemaChecking.cpp +++ b/tools/clang/lib/Sema/SemaChecking.cpp @@ -1426,7 +1426,7 @@ bool Sema::CheckFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, CheckMemaccessArguments(TheCall, CMId, FnInfo); #endif // HLSL Change Ends - CheckHLSLFunctionCall(FDecl, TheCall, Proto); // HLSL Change + CheckHLSLFunctionCall(FDecl, TheCall); // HLSL Change return false; } diff --git a/tools/clang/lib/Sema/SemaCodeComplete.cpp b/tools/clang/lib/Sema/SemaCodeComplete.cpp index b1b4668ba3..84d0990346 100644 --- a/tools/clang/lib/Sema/SemaCodeComplete.cpp +++ b/tools/clang/lib/Sema/SemaCodeComplete.cpp @@ -4020,7 +4020,7 @@ void Sema::CodeCompleteCall(Scope *S, Expr *Fn, ArrayRef Args) { Expr *NakedFn = Fn->IgnoreParenCasts(); if (auto ULE = dyn_cast(NakedFn)) - AddOverloadedCallCandidates(ULE, Args, CandidateSet, + AddOverloadedCallCandidates(ULE, Args, CandidateSet, S, // HLSL Change /*PartialOverloading=*/true); else if (auto UME = dyn_cast(NakedFn)) { TemplateArgumentListInfo TemplateArgsBuffer, *TemplateArgs = nullptr; diff --git a/tools/clang/lib/Sema/SemaDXR.cpp b/tools/clang/lib/Sema/SemaDXR.cpp index 36ab55ea10..0ccb21fb2b 100644 --- a/tools/clang/lib/Sema/SemaDXR.cpp +++ b/tools/clang/lib/Sema/SemaDXR.cpp @@ -28,6 +28,7 @@ #include "dxc/DXIL/DxilConstants.h" #include "dxc/DXIL/DxilShaderModel.h" +#include "dxc/HlslIntrinsicOp.h" using namespace clang; using namespace sema; @@ -49,9 +50,9 @@ struct PayloadUse { const MemberExpr *Member = nullptr; }; -struct TraceRayCall { - TraceRayCall() = default; - TraceRayCall(const CallExpr *Call, const CFGBlock *Parent) +struct PayloadBuiltinCall { + PayloadBuiltinCall() = default; + PayloadBuiltinCall(const CallExpr *Call, const CFGBlock *Parent) : Call(Call), Parent(Parent) {} const CallExpr *Call = nullptr; const CFGBlock *Parent = nullptr; @@ -71,7 +72,7 @@ struct DxrShaderDiagnoseInfo { const FunctionDecl *funcDecl; const VarDecl *Payload; DXIL::PayloadAccessShaderStage Stage; - std::vector TraceCalls; + std::vector PayloadBuiltinCalls; std::map> WritesPerField; std::map> ReadsPerField; std::vector PayloadAsCallArg; @@ -121,24 +122,42 @@ GetPayloadQualifierForStage(FieldDecl *Field, return DXIL::PayloadAccessQualifier::NoAccess; } -// Returns the declaration of the payload used in a TraceRay call -const VarDecl *GetPayloadParameterForTraceCall(const CallExpr *Trace) { - const Decl *callee = Trace->getCalleeDecl(); - if (!callee) +static int GetPayloadParamIdxForIntrinsic(const FunctionDecl *FD) { + HLSLIntrinsicAttr *IntrinAttr = FD->getAttr(); + if (!IntrinAttr) + return -1; + switch ((IntrinsicOp)IntrinAttr->getOpcode()) { + default: + return -1; + case IntrinsicOp::IOP_TraceRay: + case IntrinsicOp::MOP_DxHitObject_TraceRay: + case IntrinsicOp::MOP_DxHitObject_Invoke: + return FD->getNumParams() - 1; + } +} + +static bool IsBuiltinWithPayload(const FunctionDecl *FD) { + return GetPayloadParamIdxForIntrinsic(FD) >= 0; +} + +// Returns the declaration of the payload used in a call to TraceRay, +// HitObject::TraceRay or HitObject::Invoke. +const VarDecl *GetPayloadParameterForBuiltinCall(const CallExpr *Call) { + const Decl *Callee = Call->getCalleeDecl(); + if (!Callee) return nullptr; - if (!isa(callee)) + if (!isa(Callee)) return nullptr; - const FunctionDecl *FD = cast(callee); + int PldParamIdx = GetPayloadParamIdxForIntrinsic(cast(Callee)); + if (PldParamIdx < 0) + return nullptr; - if (FD->isImplicit() && FD->getName() == "TraceRay") { - const Stmt *Param = IgnoreParensAndDecay(Trace->getArg(7)); - if (const DeclRefExpr *ParamRef = dyn_cast(Param)) { - if (const VarDecl *Decl = dyn_cast(ParamRef->getDecl())) - return Decl; - } - } + const Stmt *Param = IgnoreParensAndDecay(Call->getArg(PldParamIdx)); + if (const DeclRefExpr *ParamRef = dyn_cast(Param)) + if (const VarDecl *Decl = dyn_cast(ParamRef->getDecl())) + return Decl; return nullptr; } @@ -190,12 +209,9 @@ void CollectReadsWritesAndCallsForPayload(const Stmt *S, } } -// Collects all TraceRay calls. -void CollectTraceRayCalls(const Stmt *S, DxrShaderDiagnoseInfo &Info, - const CFGBlock *Block) { - // TraceRay has void as return type so it should never be something else - // than a plain CallExpr. - +// Collects all calls to TraceRay, HitObject::TraceRay and HitObject::Invoke. +void CollectBuiltinCallsWithPayload(const Stmt *S, DxrShaderDiagnoseInfo &Info, + const CFGBlock *Block) { if (const CallExpr *Call = dyn_cast(S)) { const Decl *Callee = Call->getCalleeDecl(); @@ -204,11 +220,8 @@ void CollectTraceRayCalls(const Stmt *S, DxrShaderDiagnoseInfo &Info, const FunctionDecl *CalledFunction = cast(Callee); - // Ignore trace calls here. - if (CalledFunction->isImplicit() && - CalledFunction->getName() == "TraceRay") { - Info.TraceCalls.push_back({Call, Block}); - } + if (IsBuiltinWithPayload(CalledFunction)) + Info.PayloadBuiltinCalls.push_back({Call, Block}); } } @@ -528,13 +541,14 @@ void TraverseCFG(const CFGBlock &Block, Action PerElementAction, } } -// Forward traverse the CFG and collect calls to TraceRay. -void ForwardTraverseCFGAndCollectTraceCalls( +// Forward traverse the CFG and collect calls to TraceRay, HitObject::TraceRay +// and HitObject::Invoke. +void ForwardTraverseCFGAndCollectBuiltinCallsWithPayload( const CFGBlock &Block, DxrShaderDiagnoseInfo &Info, std::set &Visited) { auto Action = [&Info](const CFGBlock &Block, const CFGElement &Element) { if (Optional S = Element.getAs()) { - CollectTraceRayCalls(S->getStmt(), Info, &Block); + CollectBuiltinCallsWithPayload(S->getStmt(), Info, &Block); } }; @@ -664,9 +678,9 @@ DiagnosePayloadAsFunctionArg( const FunctionDecl *CalledFunction = cast(Callee); // Ignore trace calls here. - if (CalledFunction->isImplicit() && - CalledFunction->getName() == "TraceRay") { - Info.TraceCalls.push_back(TraceRayCall{Call, Use.Parent}); + if (IsBuiltinWithPayload(CalledFunction)) { + Info.PayloadBuiltinCalls.push_back( + PayloadBuiltinCall{Call, Use.Parent}); continue; } @@ -789,10 +803,12 @@ void HandlePayloadInitializer(DxrShaderDiagnoseInfo &Info) { } } -// Emit diagnostics for a TraceRay call. -void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, - const TraceRayCall &Trace, DominatorTree &DT) { - // For each TraceRay call check if write(caller) fields are written. +// Emit diagnostics for this call to either TraceRay, HitObject::TraceRay or +// HitObject::Invoke. +void DiagnoseBuiltinCallWithPayload(Sema &S, const VarDecl *Payload, + const PayloadBuiltinCall &PldCall, + DominatorTree &DT) { + // For each call check if write(caller) fields are written. const DXIL::PayloadAccessShaderStage CallerStage = DXIL::PayloadAccessShaderStage::Caller; @@ -810,12 +826,17 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, return; } - if (ContainsLongVector(Payload->getType())) { - const unsigned PayloadParametersIdx = 10; - S.Diag(Payload->getLocation(), diag::err_hlsl_unsupported_long_vector) - << PayloadParametersIdx; + // Verify that the payload type is legal + if (!hlsl::IsHLSLCopyableAnnotatableRecord(Payload->getType())) + S.Diag(Payload->getLocation(), diag::err_payload_attrs_must_be_udt) + << /*payload|attributes|callable*/ 0 << /*parameter %2|type*/ 0 + << Payload; + + // This will produce more details, but also catch disallowed long vectors + const TypeDiagContext DiagContext = TypeDiagContext::PayloadParameters; + if (DiagnoseTypeElements(S, Payload->getLocation(), Payload->getType(), + DiagContext, DiagContext)) return; - } CollectNonAccessableFields(PayloadType, CallerStage, {}, {}, NonWriteableFields, NonReadableFields); @@ -832,12 +853,12 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, std::set Visited; - const CFGBlock *Parent = Trace.Parent; + const CFGBlock *Parent = PldCall.Parent; Visited.insert(Parent); - // Collect payload accesses in the same block until we reach the TraceRay call + // Collect payload accesses in the same block until we reach the call for (auto Element : *Parent) { if (Optional S = Element.getAs()) { - if (S->getStmt() == Trace.Call) + if (S->getStmt() == PldCall.Call) break; CollectReadsWritesAndCallsForPayload(S->getStmt(), TraceInfo, Parent); } @@ -850,10 +871,12 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, BackwardTraverseCFGAndCollectReadsWrites(*Pred, TraceInfo, Visited); } + int PldArgIdx = PldCall.Call->getNumArgs() - 1; + // Warn if a writeable field has not been written. for (const FieldDecl *Field : WriteableFields) { if (!TraceInfo.WritesPerField.count(Field)) { - S.Diag(Trace.Call->getArg(7)->getExprLoc(), + S.Diag(PldCall.Call->getArg(PldArgIdx)->getExprLoc(), diag::warn_hlsl_payload_access_no_write_for_trace_payload) << Field->getName(); } @@ -862,7 +885,7 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, for (const FieldDecl *Field : NonWriteableFields) { if (TraceInfo.WritesPerField.count(Field)) { S.Diag( - Trace.Call->getArg(7)->getExprLoc(), + PldCall.Call->getArg(PldArgIdx)->getExprLoc(), diag::warn_hlsl_payload_access_write_but_no_write_for_trace_payload) << Field->getName(); } @@ -878,7 +901,7 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, bool CallFound = false; for (auto Element : *Parent) { // TODO: reverse iterate? if (Optional S = Element.getAs()) { - if (S->getStmt() == Trace.Call) { + if (S->getStmt() == PldCall.Call) { CallFound = true; continue; } @@ -895,7 +918,7 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, for (const FieldDecl *Field : ReadableFields) { if (!TraceInfo.ReadsPerField.count(Field)) { - S.Diag(Trace.Call->getArg(7)->getExprLoc(), + S.Diag(PldCall.Call->getArg(PldArgIdx)->getExprLoc(), diag::warn_hlsl_payload_access_read_but_no_read_after_trace) << Field->getName(); } @@ -928,27 +951,29 @@ void DiagnoseTraceCall(Sema &S, const VarDecl *Payload, } } -// Emit diagnostics for all TraceRay calls. -void DiagnoseTraceCalls(Sema &S, CFG &ShaderCFG, DominatorTree &DT, - DxrShaderDiagnoseInfo &Info) { - // Collect TraceRay calls in the shader. +// Emit diagnostics for all calls to TraceRay, HitObject::TraceRay or +// HitObject::Invoke. +void DiagnoseBuiltinCallsWithPayload(Sema &S, CFG &ShaderCFG, DominatorTree &DT, + DxrShaderDiagnoseInfo &Info) { + // Collect calls with payload in the shader. std::set Visited; - ForwardTraverseCFGAndCollectTraceCalls(ShaderCFG.getEntry(), Info, Visited); + ForwardTraverseCFGAndCollectBuiltinCallsWithPayload(ShaderCFG.getEntry(), + Info, Visited); std::set Diagnosed; - for (const TraceRayCall &TraceCall : Info.TraceCalls) { - if (Diagnosed.count(TraceCall.Call)) + for (const PayloadBuiltinCall &PldCall : Info.PayloadBuiltinCalls) { + if (Diagnosed.count(PldCall.Call)) continue; - Diagnosed.insert(TraceCall.Call); + Diagnosed.insert(PldCall.Call); - const VarDecl *Payload = GetPayloadParameterForTraceCall(TraceCall.Call); - DiagnoseTraceCall(S, Payload, TraceCall, DT); + const VarDecl *Payload = GetPayloadParameterForBuiltinCall(PldCall.Call); + DiagnoseBuiltinCallWithPayload(S, Payload, PldCall, DT); } } // Emit diagnostics for all access to the payload of a shader, -// and the input to TraceRay calls. +// and the input to TraceRay, HitObject::TraceRay or HitObject::Invoke calls. std::vector DiagnosePayloadAccess(Sema &S, DxrShaderDiagnoseInfo &Info, const std::set &FieldsToIgnoreRead, @@ -1012,7 +1037,7 @@ DiagnosePayloadAccess(Sema &S, DxrShaderDiagnoseInfo &Info, DiagnosePayloadReads(S, TheCFG, DT, Info, NonReadableFields); } - DiagnoseTraceCalls(S, TheCFG, DT, Info); + DiagnoseBuiltinCallsWithPayload(S, TheCFG, DT, Info); return WrittenFields; } @@ -1165,9 +1190,13 @@ void DiagnoseCallableEntry(Sema &S, FunctionDecl *FD, << /*payload|callable*/ 1 << Param; QualType Ty = Param->getType().getNonReferenceType(); - if (!(hlsl::IsHLSLCopyableAnnotatableRecord(Ty))) + // Don't diagnose incomplete type here. Function parameters are + // checked in Sema::CheckParmsForFunctionDef. + if (!S.RequireCompleteType(Param->getLocation(), Ty, 0) && + !(hlsl::IsHLSLCopyableAnnotatableRecord(Ty))) S.Diag(Param->getLocation(), diag::err_payload_attrs_must_be_udt) - << /*payload|attributes|callable*/ 2 << Param; + << /*payload|attributes|callable*/ 2 << /*parameter %2|type*/ 0 + << Param; } return; } @@ -1206,9 +1235,15 @@ void DiagnoseMissOrAnyHitEntry(Sema &S, FunctionDecl *FD, QualType Ty = Param->getType().getNonReferenceType(); + // Don't diagnose here, just continue if this fails. Function parameters are + // checked in Sema::CheckParmsForFunctionDef. + if (S.RequireCompleteType(Param->getLocation(), Ty, 0)) + continue; + if (!(hlsl::IsHLSLCopyableAnnotatableRecord(Ty))) { S.Diag(Param->getLocation(), diag::err_payload_attrs_must_be_udt) - << /*payload|attributes|callable*/ Idx << Param; + << /*payload|attributes|callable*/ Idx << /*parameter %2|type*/ 0 + << Param; } } return; @@ -1259,9 +1294,15 @@ void DiagnoseClosestHitEntry(Sema &S, FunctionDecl *FD, QualType Ty = Param->getType().getNonReferenceType(); + // Don't diagnose here, just continue if this fails. Function parameters are + // checked in Sema::CheckParmsForFunctionDef. + if (S.RequireCompleteType(Param->getLocation(), Ty, 0)) + continue; + if (!(hlsl::IsHLSLCopyableAnnotatableRecord(Ty))) { S.Diag(Param->getLocation(), diag::err_payload_attrs_must_be_udt) - << /*payload|attributes|callable*/ Idx << Param; + << /*payload|attributes|callable*/ Idx << /*parameter %2|type*/ 0 + << Param; } } return; diff --git a/tools/clang/lib/Sema/SemaDecl.cpp b/tools/clang/lib/Sema/SemaDecl.cpp index e09bf4623c..a772054960 100644 --- a/tools/clang/lib/Sema/SemaDecl.cpp +++ b/tools/clang/lib/Sema/SemaDecl.cpp @@ -5331,7 +5331,7 @@ bool Sema::inferObjCARCLifetime(ValueDecl *decl) { Qualifiers::ObjCLifetime lifetime = type.getObjCLifetime(); if (lifetime == Qualifiers::OCL_Autoreleasing) { // Various kinds of declaration aren't allowed to be __autoreleasing. - unsigned kind = -1U; + unsigned kind = ~0U; if (VarDecl *var = dyn_cast(decl)) { if (var->hasAttr()) kind = 0; // __block @@ -5343,7 +5343,7 @@ bool Sema::inferObjCARCLifetime(ValueDecl *decl) { kind = 2; // field } - if (kind != -1U) { + if (kind != ~0U) { Diag(decl->getLocation(), diag::err_arc_autoreleasing_var) << kind; } diff --git a/tools/clang/lib/Sema/SemaExpr.cpp b/tools/clang/lib/Sema/SemaExpr.cpp index 507b6a7508..cccf711126 100644 --- a/tools/clang/lib/Sema/SemaExpr.cpp +++ b/tools/clang/lib/Sema/SemaExpr.cpp @@ -1466,7 +1466,7 @@ Sema::CreateGenericSelectionExpr(SourceLocation KeyLoc, ContainsUnexpandedParameterPack); SmallVector CompatIndices; - unsigned DefaultIndex = -1U; + unsigned DefaultIndex = std::numeric_limits::max(); for (unsigned i = 0; i < NumAssocs; ++i) { if (!Types[i]) DefaultIndex = i; @@ -1498,7 +1498,8 @@ Sema::CreateGenericSelectionExpr(SourceLocation KeyLoc, // C11 6.5.1.1p2 "If a generic selection has no default generic association, // its controlling expression shall have type compatible with exactly one of // the types named in its generic association list." - if (DefaultIndex == -1U && CompatIndices.size() == 0) { + if (DefaultIndex == std::numeric_limits::max() && + CompatIndices.size() == 0) { // We strip parens here because the controlling expression is typically // parenthesized in macro definitions. ControllingExpr = ControllingExpr->IgnoreParens(); @@ -3504,12 +3505,14 @@ ExprResult Sema::ActOnNumericConstant(const Token &Tok, Scope *UDLScope) { Ty = Context.LitIntTy; if (Literal.GetIntegerValue(ResultVal)) { // If this value didn't fit into 64-bit literal int, report error. - Diag(Tok.getLocation(), diag::err_integer_literal_too_large); + Diag(Tok.getLocation(), diag::err_integer_literal_too_large) + << /* Unsigned */ 1; } } else { if (Literal.GetIntegerValue(ResultVal)) { - Diag(Tok.getLocation(), diag::err_integer_literal_too_large); + Diag(Tok.getLocation(), diag::err_integer_literal_too_large) + << /* Unsigned */ 1; } if (Literal.isLongLong) { if (Literal.isUnsigned) @@ -3798,13 +3801,21 @@ static void warnOnSizeofOnArrayDecay(Sema &S, SourceLocation Loc, QualType T, } // HLSL Change Begins -bool Sema::CheckHLSLUnaryExprOrTypeTraitOperand(QualType ExprType, SourceLocation Loc, +bool Sema::CheckHLSLUnaryExprOrTypeTraitOperand(QualType ExprType, + SourceLocation Loc, UnaryExprOrTypeTrait ExprKind) { assert(ExprKind == UnaryExprOrTypeTrait::UETT_SizeOf); - // "sizeof 42" is ill-defined because HLSL has literal int type which can decay to an int of any size. - const BuiltinType* BuiltinTy = ExprType->getAs(); - if (BuiltinTy != nullptr && (BuiltinTy->getKind() == BuiltinType::LitInt || BuiltinTy->getKind() == BuiltinType::LitFloat)) { + if (RequireCompleteType(Loc, ExprType, + diag::err_sizeof_alignof_incomplete_type, ExprKind, + ExprType)) + return true; + + // "sizeof 42" is ill-defined because HLSL has literal int type which can + // decay to an int of any size. + const BuiltinType *BuiltinTy = ExprType->getAs(); + if (BuiltinTy != nullptr && (BuiltinTy->getKind() == BuiltinType::LitInt || + BuiltinTy->getKind() == BuiltinType::LitFloat)) { Diag(Loc, diag::err_hlsl_sizeof_literal) << ExprType; return true; } @@ -5338,8 +5349,6 @@ Sema::BuildResolvedCallExpr(Expr *Fn, NamedDecl *NDecl, if (FDecl) { if (CheckFunctionCall(FDecl, TheCall, Proto)) return ExprError(); - if (CheckHLSLFunctionCall(FDecl, TheCall)) - return ExprError(); if (BuiltinID) return CheckBuiltinFunctionCall(FDecl, BuiltinID, TheCall); } else if (NDecl) { diff --git a/tools/clang/lib/Sema/SemaExprCXX.cpp b/tools/clang/lib/Sema/SemaExprCXX.cpp index 5113c56205..1e70b95476 100644 --- a/tools/clang/lib/Sema/SemaExprCXX.cpp +++ b/tools/clang/lib/Sema/SemaExprCXX.cpp @@ -5,9 +5,6 @@ // This file is distributed under the University of Illinois Open Source // License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// //===----------------------------------------------------------------------===// /// /// \file diff --git a/tools/clang/lib/Sema/SemaHLSL.cpp b/tools/clang/lib/Sema/SemaHLSL.cpp index 418425a468..656dfb401f 100644 --- a/tools/clang/lib/Sema/SemaHLSL.cpp +++ b/tools/clang/lib/Sema/SemaHLSL.cpp @@ -6,15 +6,13 @@ // This file is distributed under the University of Illinois Open Source // // License. See LICENSE.TXT for details. // // // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. // -// All rights reserved. // -// // // This file implements the semantic support for HLSL. // // // /////////////////////////////////////////////////////////////////////////////// #include "clang/Sema/SemaHLSL.h" #include "VkConstantsTables.h" +#include "dxc/DXIL/DxilConstants.h" #include "dxc/DXIL/DxilFunctionProps.h" #include "dxc/DXIL/DxilShaderModel.h" #include "dxc/DXIL/DxilUtil.h" @@ -46,6 +44,7 @@ #include "clang/Sema/TemplateDeduction.h" #include "llvm/ADT/DenseMap.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/ADT/SmallSet.h" #include "llvm/ADT/StringRef.h" #include "llvm/Support/ErrorHandling.h" #include "llvm/Support/raw_ostream.h" @@ -1138,6 +1137,14 @@ static const ArBasicKind g_RayDescCT[] = {AR_OBJECT_RAY_DESC, AR_BASIC_UNKNOWN}; static const ArBasicKind g_RayQueryCT[] = {AR_OBJECT_RAY_QUERY, AR_BASIC_UNKNOWN}; +static const ArBasicKind g_LinAlgCT[] = { + AR_BASIC_FLOAT32, AR_BASIC_FLOAT32_PARTIAL_PRECISION, + AR_BASIC_FLOAT16, AR_BASIC_INT32, + AR_BASIC_INT16, AR_BASIC_UINT32, + AR_BASIC_UINT16, AR_BASIC_INT8_4PACKED, + AR_BASIC_UINT8_4PACKED, AR_BASIC_NOCAST, + AR_BASIC_UNKNOWN}; + static const ArBasicKind g_AccelerationStructCT[] = { AR_OBJECT_ACCELERATION_STRUCT, AR_BASIC_UNKNOWN}; @@ -1301,6 +1308,7 @@ const ArBasicKind *g_LegalIntrinsicCompTypes[] = { g_ThreadNodeOutputRecordsCT, // LICOMPTYPE_THREAD_NODE_OUTPUT_RECORDS g_DxHitObjectCT, // LICOMPTYPE_HIT_OBJECT g_RayQueryCT, // LICOMPTYPE_RAY_QUERY + g_LinAlgCT, // LICOMPTYPE_LINALG #ifdef ENABLE_SPIRV_CODEGEN g_VKBufferPointerCT, // LICOMPTYPE_VK_BUFFER_POINTER #endif @@ -4144,6 +4152,7 @@ class HLSLExternalSource : public ExternalSemaSource { SourceLocation(), &context.Idents.get("dx"), /*PrevDecl*/ nullptr); m_dxNSDecl->setImplicit(); + m_dxNSDecl->setHasExternalLexicalStorage(true); context.getTranslationUnitDecl()->addDecl(m_dxNSDecl); #ifdef ENABLE_SPIRV_CODEGEN @@ -5161,7 +5170,7 @@ class HLSLExternalSource : public ExternalSemaSource { bool AddOverloadedCallCandidates(UnresolvedLookupExpr *ULE, ArrayRef Args, - OverloadCandidateSet &CandidateSet, + OverloadCandidateSet &CandidateSet, Scope *S, bool PartialOverloading) override { DXASSERT_NOMSG(ULE != nullptr); @@ -5186,6 +5195,8 @@ class HLSLExternalSource : public ExternalSemaSource { // Exceptions: // - Vulkan-specific intrinsics live in the 'vk::' namespace. // - DirectX-specific intrinsics live in the 'dx::' namespace. + // - Global namespaces could just mean we have a `using` declaration... so + // it can be anywhere! if (isQualified && !isGlobalNamespace && !isVkNamespace && !isDxNamespace) return false; @@ -5196,81 +5207,106 @@ class HLSLExternalSource : public ExternalSemaSource { } StringRef nameIdentifier = idInfo->getName(); - const HLSL_INTRINSIC *table = g_Intrinsics; - auto tableCount = _countof(g_Intrinsics); - if (isDxNamespace) { - table = g_DxIntrinsics; - tableCount = _countof(g_DxIntrinsics); + using IntrinsicArray = llvm::ArrayRef; + struct IntrinsicTableEntry { + IntrinsicArray Table; + NamespaceDecl *NS; + }; + + llvm::SmallVector SearchTables; + + bool SearchDX = isDxNamespace; + bool SearchVK = isVkNamespace; + if (isGlobalNamespace || !isQualified) + SearchTables.push_back( + IntrinsicTableEntry{IntrinsicArray(g_Intrinsics), m_hlslNSDecl}); + + if (S && !isQualified) { + SmallVector NSContexts; + m_sema->CollectNamespaceContexts(S, NSContexts); + for (const auto &UD : NSContexts) { + if (static_cast(m_dxNSDecl) == UD) + SearchDX = true; + else if (static_cast(m_vkNSDecl) == UD) + SearchVK = true; + } } + + if (SearchDX) + SearchTables.push_back( + IntrinsicTableEntry{IntrinsicArray(g_DxIntrinsics), m_dxNSDecl}); #ifdef ENABLE_SPIRV_CODEGEN - if (isVkNamespace) { - table = g_VkIntrinsics; - tableCount = _countof(g_VkIntrinsics); - } -#endif // ENABLE_SPIRV_CODEGEN + if (SearchVK) + SearchTables.push_back( + IntrinsicTableEntry{IntrinsicArray(g_VkIntrinsics), m_vkNSDecl}); +#endif - IntrinsicDefIter cursor = FindIntrinsicByNameAndArgCount( - table, tableCount, StringRef(), nameIdentifier, Args.size()); - IntrinsicDefIter end = IntrinsicDefIter::CreateEnd( - table, tableCount, IntrinsicTableDefIter::CreateEnd(m_intrinsicTables)); - - for (; cursor != end; ++cursor) { - // If this is the intrinsic we're interested in, build up a representation - // of the types we need. - const HLSL_INTRINSIC *pIntrinsic = *cursor; - LPCSTR tableName = cursor.GetTableName(); - LPCSTR lowering = cursor.GetLoweringStrategy(); - DXASSERT(pIntrinsic->uNumArgs <= g_MaxIntrinsicParamCount + 1, - "otherwise g_MaxIntrinsicParamCount needs to be updated for " - "wider signatures"); - - std::vector functionArgTypes; - size_t badArgIdx; - bool argsMatch = - MatchArguments(cursor, QualType(), QualType(), QualType(), Args, - &functionArgTypes, badArgIdx); - if (!functionArgTypes.size()) - return false; + assert(!SearchTables.empty() && "Must have at least one search table!"); + + for (const auto &T : SearchTables) { + + IntrinsicDefIter cursor = FindIntrinsicByNameAndArgCount( + T.Table.data(), T.Table.size(), StringRef(), nameIdentifier, + Args.size()); + IntrinsicDefIter end = IntrinsicDefIter::CreateEnd( + T.Table.data(), T.Table.size(), + IntrinsicTableDefIter::CreateEnd(m_intrinsicTables)); + + for (; cursor != end; ++cursor) { + // If this is the intrinsic we're interested in, build up a + // representation of the types we need. + const HLSL_INTRINSIC *pIntrinsic = *cursor; + LPCSTR tableName = cursor.GetTableName(); + LPCSTR lowering = cursor.GetLoweringStrategy(); + DXASSERT(pIntrinsic->uNumArgs <= g_MaxIntrinsicParamCount + 1, + "otherwise g_MaxIntrinsicParamCount needs to be updated for " + "wider signatures"); + + std::vector functionArgTypes; + size_t badArgIdx; + bool argsMatch = + MatchArguments(cursor, QualType(), QualType(), QualType(), Args, + &functionArgTypes, badArgIdx); + if (!functionArgTypes.size()) + return false; - // Get or create the overload we're interested in. - FunctionDecl *intrinsicFuncDecl = nullptr; - std::pair insertResult = - m_usedIntrinsics.insert(UsedIntrinsic(pIntrinsic, functionArgTypes)); - bool insertedNewValue = insertResult.second; - if (insertedNewValue) { - NamespaceDecl *nsDecl = m_hlslNSDecl; - if (isVkNamespace) - nsDecl = m_vkNSDecl; - else if (isDxNamespace) - nsDecl = m_dxNSDecl; - DXASSERT(tableName, - "otherwise IDxcIntrinsicTable::GetTableName() failed"); - intrinsicFuncDecl = - AddHLSLIntrinsicFunction(*m_context, nsDecl, tableName, lowering, - pIntrinsic, &functionArgTypes); - insertResult.first->setFunctionDecl(intrinsicFuncDecl); - } else { - intrinsicFuncDecl = (*insertResult.first).getFunctionDecl(); - } + // Get or create the overload we're interested in. + FunctionDecl *intrinsicFuncDecl = nullptr; + std::pair insertResult = + m_usedIntrinsics.insert( + UsedIntrinsic(pIntrinsic, functionArgTypes)); + bool insertedNewValue = insertResult.second; + if (insertedNewValue) { + DXASSERT(tableName, + "otherwise IDxcIntrinsicTable::GetTableName() failed"); + intrinsicFuncDecl = + AddHLSLIntrinsicFunction(*m_context, T.NS, tableName, lowering, + pIntrinsic, &functionArgTypes); + insertResult.first->setFunctionDecl(intrinsicFuncDecl); + } else { + intrinsicFuncDecl = (*insertResult.first).getFunctionDecl(); + } - OverloadCandidate &candidate = CandidateSet.addCandidate(Args.size()); - candidate.Function = intrinsicFuncDecl; - candidate.FoundDecl.setDecl(intrinsicFuncDecl); - candidate.Viable = argsMatch; - CandidateSet.isNewCandidate(intrinsicFuncDecl); // used to insert into set - if (argsMatch) - return true; - if (badArgIdx) { - candidate.FailureKind = ovl_fail_bad_conversion; - QualType ParamType = - intrinsicFuncDecl->getParamDecl(badArgIdx - 1)->getType(); - candidate.Conversions[badArgIdx - 1].setBad( - BadConversionSequence::no_conversion, Args[badArgIdx - 1], - ParamType); - } else { - // A less informative error. Needed when the failure relates to the - // return type - candidate.FailureKind = ovl_fail_bad_final_conversion; + OverloadCandidate &candidate = CandidateSet.addCandidate(Args.size()); + candidate.Function = intrinsicFuncDecl; + candidate.FoundDecl.setDecl(intrinsicFuncDecl); + candidate.Viable = argsMatch; + CandidateSet.isNewCandidate( + intrinsicFuncDecl); // used to insert into set + if (argsMatch) + return true; + if (badArgIdx) { + candidate.FailureKind = ovl_fail_bad_conversion; + QualType ParamType = + intrinsicFuncDecl->getParamDecl(badArgIdx - 1)->getType(); + candidate.Conversions[badArgIdx - 1].setBad( + BadConversionSequence::no_conversion, Args[badArgIdx - 1], + ParamType); + } else { + // A less informative error. Needed when the failure relates to the + // return type + candidate.FailureKind = ovl_fail_bad_final_conversion; + } } } @@ -5394,7 +5430,17 @@ class HLSLExternalSource : public ExternalSemaSource { objectKind = ClassifyRecordType(recordType); switch (objectKind) { case AR_TOBJ_OBJECT: - m_sema->Diag(argLoc, diag::err_hlsl_objectintemplateargument) << type; +#ifdef ENABLE_SPIRV_CODEGEN + if (const auto *namespaceDecl = dyn_cast( + recordType->getDecl()->getDeclContext()); + namespaceDecl && namespaceDecl->getName().equals("vk") && + (recordType->getDecl()->getName().equals("SpirvType") || + recordType->getDecl()->getName().equals("SpirvOpaqueType"))) { + return true; + } +#endif + m_sema->Diag(argLoc, diag::err_hlsl_unsupported_object_context) + << type << static_cast(TypeDiagContext::TypeParameter); return false; case AR_TOBJ_COMPOUND: { const RecordDecl *recordDecl = recordType->getDecl(); @@ -5533,14 +5579,27 @@ class HLSLExternalSource : public ExternalSemaSource { m_sema->RequireCompleteType(argSrcLoc, argType, diag::err_typecheck_decl_incomplete_type); - if (ContainsLongVector(argType)) { - const unsigned ConstantBuffersOrTextureBuffersIdx = 0; - m_sema->Diag(argSrcLoc, diag::err_hlsl_unsupported_long_vector) - << ConstantBuffersOrTextureBuffersIdx; + TypeDiagContext DiagContext = + TypeDiagContext::ConstantBuffersOrTextureBuffers; + if (DiagnoseTypeElements(*m_sema, argSrcLoc, argType, DiagContext, + DiagContext)) return true; - } } return false; + } else if (ResAttr && DXIL::IsStructuredBuffer(ResAttr->getResKind())) { + if (TemplateArgList.size() == 1) { + const TemplateArgumentLoc &ArgLoc = TemplateArgList[0]; + const TemplateArgument &Arg = ArgLoc.getArgument(); + if (Arg.getKind() == TemplateArgument::ArgKind::Type) { + QualType ArgType = Arg.getAsType(); + SourceLocation ArgSrcLoc = ArgLoc.getLocation(); + if (DiagnoseTypeElements( + *m_sema, ArgSrcLoc, ArgType, + TypeDiagContext::StructuredBuffers /*ObjDiagContext*/, + TypeDiagContext::Valid /*LongVecDiagContext*/)) + return true; + } + } } else if (Template->getTemplatedDecl()->hasAttr()) { @@ -5641,13 +5700,10 @@ class HLSLExternalSource : public ExternalSemaSource { CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl(); if (Decl && !Decl->isCompleteDefinition()) return true; - if (ContainsLongVector(arg.getAsType())) { - const unsigned TessellationPatchesIDx = 1; - m_sema->Diag(argLoc.getLocation(), - diag::err_hlsl_unsupported_long_vector) - << TessellationPatchesIDx; + const TypeDiagContext DiagContext = TypeDiagContext::TessellationPatches; + if (DiagnoseTypeElements(*m_sema, argLoc.getLocation(), arg.getAsType(), + DiagContext, DiagContext)) return true; - } } else if (Template->getTemplatedDecl()->hasAttr()) { DXASSERT(TemplateArgList.size() > 0, "Geometry streams should have at least one template args"); @@ -5660,13 +5716,10 @@ class HLSLExternalSource : public ExternalSemaSource { CXXRecordDecl *Decl = arg.getAsType()->getAsCXXRecordDecl(); if (Decl && !Decl->isCompleteDefinition()) return true; - if (ContainsLongVector(arg.getAsType())) { - const unsigned GeometryStreamsIdx = 2; - m_sema->Diag(argLoc.getLocation(), - diag::err_hlsl_unsupported_long_vector) - << GeometryStreamsIdx; + const TypeDiagContext DiagContext = TypeDiagContext::GeometryStreams; + if (DiagnoseTypeElements(*m_sema, argLoc.getLocation(), arg.getAsType(), + DiagContext, DiagContext)) return true; - } } bool isMatrix = Template->getCanonicalDecl() == @@ -5945,6 +5998,8 @@ class HLSLExternalSource : public ExternalSemaSource { "otherwise caller didn't initialize - there should be at least a " "void return type"); + const bool IsStatic = IsStaticMember(intrinsic); + // Create the template arguments. SmallVector templateArgs; for (size_t i = 0; i < parameterTypeCount; i++) { @@ -6010,15 +6065,19 @@ class HLSLExternalSource : public ExternalSemaSource { SmallVector Params; for (unsigned int i = 1; i < parameterTypeCount; i++) { + // The first parameter in the HLSL intrinsic record is just the intrinsic + // name and aliases with the 'this' pointer for non-static members. Skip + // this first parameter for static functions. + unsigned ParamIdx = IsStatic ? i : i - 1; IdentifierInfo *id = - &m_context->Idents.get(StringRef(intrinsic->pArgs[i - 1].pName)); + &m_context->Idents.get(StringRef(intrinsic->pArgs[ParamIdx].pName)); ParmVarDecl *paramDecl = ParmVarDecl::Create( *m_context, nullptr, NoLoc, NoLoc, id, parameterTypes[i], nullptr, StorageClass::SC_None, nullptr, paramMods[i - 1]); Params.push_back(paramDecl); } - StorageClass SC = IsStaticMember(intrinsic) ? SC_Static : SC_Extern; + StorageClass SC = IsStatic ? SC_Static : SC_Extern; QualType T = TInfo->getType(); DeclarationNameInfo NameInfo(FunctionTemplate->getDeclName(), NoLoc); CXXMethodDecl *method = CXXMethodDecl::Create( @@ -6731,8 +6790,8 @@ bool HLSLExternalSource::MatchArguments( (iArg != retArgIdx && retTypeIdx == pIntrinsicArg->uComponentTypeId); // For literal arg which don't affect return type, find concrete type. // For literal arg affect return type, - // TryEvalIntrinsic in CGHLSLMS.cpp will take care of cases - // where all argumentss are literal. + // TryEvalIntrinsic in CGHLSLMSFinishCodeGen.cpp will take care of + // cases where all arguments are literal. // CombineBasicTypes will cover the rest cases. if (!affectRetType) { TypeInfoEltKind = @@ -10770,6 +10829,26 @@ HLSLExternalSource::ApplyTypeSpecSignToParsedType(clang::QualType &type, } } +bool CheckIntersectionAttributeArg(Sema &S, Expr *E) { + SourceLocation Loc = E->getExprLoc(); + QualType Ty = E->getType(); + + // Identify problematic fields first (high diagnostic accuracy, may miss some + // invalid cases) + const TypeDiagContext DiagContext = TypeDiagContext::Attributes; + if (DiagnoseTypeElements(S, Loc, Ty, DiagContext, DiagContext)) + return true; + + // Must be a UDT (low diagnostic accuracy, catches remaining invalid cases) + if (Ty.isNull() || !hlsl::IsHLSLCopyableAnnotatableRecord(Ty)) { + S.Diag(Loc, diag::err_payload_attrs_must_be_udt) + << /*payload|attributes|callable*/ 1 << /*parameter %2|type*/ 1; + return true; + } + + return false; +} + Sema::TemplateDeductionResult HLSLExternalSource::DeduceTemplateArgumentsForHLSL( FunctionTemplateDecl *FunctionTemplate, @@ -10892,28 +10971,38 @@ HLSLExternalSource::DeduceTemplateArgumentsForHLSL( IsBABLoad = intrinsicOp == (UINT)IntrinsicOp::MOP_Load; IsBABStore = intrinsicOp == (UINT)IntrinsicOp::MOP_Store; } - if (ExplicitTemplateArgs && ExplicitTemplateArgs->size() > 0) { - bool isLegalTemplate = false; + if (ExplicitTemplateArgs && ExplicitTemplateArgs->size() >= 1) { SourceLocation Loc = ExplicitTemplateArgs->getLAngleLoc(); - auto TemplateDiag = diag::err_hlsl_intrinsic_template_arg_unsupported; - if (ExplicitTemplateArgs->size() >= 1 && (IsBABLoad || IsBABStore)) { - TemplateDiag = diag::err_hlsl_intrinsic_template_arg_requires_2018; - Loc = (*ExplicitTemplateArgs)[0].getLocation(); - if (Is2018) { - TemplateDiag = diag::err_hlsl_intrinsic_template_arg_numeric; - if (ExplicitTemplateArgs->size() == 1 && - !functionTemplateTypeArg.isNull() && - hlsl::IsHLSLNumericOrAggregateOfNumericType( - functionTemplateTypeArg)) { - isLegalTemplate = true; - } - } + if (!IsBABLoad && !IsBABStore) { + getSema()->Diag(Loc, diag::err_hlsl_intrinsic_template_arg_unsupported) + << intrinsicName; + return Sema::TemplateDeductionResult::TDK_Invalid; } - - if (!isLegalTemplate) { - getSema()->Diag(Loc, TemplateDiag) << intrinsicName; + Loc = (*ExplicitTemplateArgs)[0].getLocation(); + if (!Is2018) { + getSema()->Diag(Loc, + diag::err_hlsl_intrinsic_template_arg_requires_2018) + << intrinsicName; return Sema::TemplateDeductionResult::TDK_Invalid; } + + if (IsBABLoad || IsBABStore) { + const bool IsNull = functionTemplateTypeArg.isNull(); + // Incomplete type is diagnosed elsewhere, so just fail if incomplete. + if (!IsNull && + getSema()->RequireCompleteType(Loc, functionTemplateTypeArg, 0)) + return Sema::TemplateDeductionResult::TDK_Invalid; + if (IsNull || !hlsl::IsHLSLNumericOrAggregateOfNumericType( + functionTemplateTypeArg)) { + getSema()->Diag(Loc, diag::err_hlsl_intrinsic_template_arg_numeric) + << intrinsicName; + DiagnoseTypeElements( + *getSema(), Loc, functionTemplateTypeArg, + TypeDiagContext::TypeParameter /*ObjDiagContext*/, + TypeDiagContext::Valid /*LongVecDiagContext*/); + return Sema::TemplateDeductionResult::TDK_Invalid; + } + } } else if (IsBABStore) { // Prior to HLSL 2018, Store operation only stored scalar uint. if (!Is2018) { @@ -11630,6 +11719,537 @@ static bool CheckBarrierCall(Sema &S, FunctionDecl *FD, CallExpr *CE, return false; } +// MatVec Ops +static const unsigned kMatVecMulOutputVectorIdx = 0; +static const unsigned kMatVecMulOutputIsUnsignedIdx = 1; +static const unsigned kMatVecMulInputVectorIdx = 2; +static const unsigned kMatVecMulIsInputUnsignedIdx = 3; +static const unsigned kMatVecMulInputInterpretationIdx = 4; +// static const unsigned kMatVecMulMatrixBufferIdx = 5; +// static const unsigned kMatVecMulMatrixOffsetIdx = 6; +static const unsigned kMatVecMulMatrixInterpretationIdx = 7; +static const unsigned kMatVecMulMatrixMIdx = 8; +static const unsigned kMatVecMulMatrixKIdx = 9; +static const unsigned kMatVecMulMatrixLayoutIdx = 10; +static const unsigned kMatVecMulMatrixTransposeIdx = 11; +static const unsigned kMatVecMulMatrixStrideIdx = 12; + +// MatVecAdd +const unsigned kMatVecMulAddBiasInterpretation = 15; + +static bool IsValidMatrixLayoutForMulAndMulAddOps(unsigned Layout) { + return Layout <= + static_cast(DXIL::LinalgMatrixLayout::OuterProductOptimal); +} + +static bool IsOptimalTypeMatrixLayout(unsigned Layout) { + return ( + Layout == (static_cast(DXIL::LinalgMatrixLayout::MulOptimal)) || + (Layout == + (static_cast(DXIL::LinalgMatrixLayout::OuterProductOptimal)))); +} + +static bool IsValidTransposeForMatrixLayout(unsigned Layout, bool Transposed) { + switch (static_cast(Layout)) { + case DXIL::LinalgMatrixLayout::RowMajor: + case DXIL::LinalgMatrixLayout::ColumnMajor: + return !Transposed; + + default: + return true; + } +} + +static bool IsPackedType(unsigned type) { + return (type == static_cast(DXIL::ComponentType::PackedS8x32) || + type == static_cast(DXIL::ComponentType::PackedU8x32)); +} + +static bool IsValidLinalgTypeInterpretation(uint32_t Input, bool InRegister) { + + switch (static_cast(Input)) { + case DXIL::ComponentType::I16: + case DXIL::ComponentType::U16: + case DXIL::ComponentType::I32: + case DXIL::ComponentType::U32: + case DXIL::ComponentType::F16: + case DXIL::ComponentType::F32: + case DXIL::ComponentType::U8: + case DXIL::ComponentType::I8: + case DXIL::ComponentType::F8_E4M3: + case DXIL::ComponentType::F8_E5M2: + return true; + case DXIL::ComponentType::PackedS8x32: + case DXIL::ComponentType::PackedU8x32: + return InRegister; + default: + return false; + } +} + +static bool IsValidVectorAndMatrixDimensions(Sema &S, CallExpr *CE, + unsigned InputVectorSize, + unsigned OutputVectorSize, + unsigned MatrixK, unsigned MatrixM, + bool isInputPacked) { + // Check if output vector size equals to matrix dimension M + if (OutputVectorSize != MatrixM) { + Expr *OutputVector = CE->getArg(kMatVecMulOutputVectorIdx); + S.Diags.Report( + OutputVector->getExprLoc(), + diag:: + err_hlsl_linalg_mul_muladd_output_vector_size_not_equal_to_matrix_M); + return false; + } + + // Check if input vector size equals to matrix dimension K in the unpacked + // case. + // Check if input vector size equals the smallest number that can hold + // matrix dimension K values + const unsigned PackingFactor = isInputPacked ? 4 : 1; + unsigned MinInputVectorSize = (MatrixK + PackingFactor - 1) / PackingFactor; + if (InputVectorSize != MinInputVectorSize) { + Expr *InputVector = CE->getArg(kMatVecMulInputVectorIdx); + if (isInputPacked) { + S.Diags.Report( + InputVector->getExprLoc(), + diag::err_hlsl_linalg_mul_muladd_packed_input_vector_size_incorrect); + return false; + } else { + S.Diags.Report( + InputVector->getExprLoc(), + diag:: + err_hlsl_linalg_mul_muladd_unpacked_input_vector_size_not_equal_to_matrix_K); + return false; + } + } + + return true; +} + +static void CheckCommonMulAndMulAddParameters(Sema &S, CallExpr *CE, + const hlsl::ShaderModel *SM) { + // Check if IsOutputUnsigned is a const parameter + bool IsOutputUnsignedFlagValue = false; + Expr *IsOutputUnsignedExpr = CE->getArg(kMatVecMulOutputIsUnsignedIdx); + llvm::APSInt IsOutputUnsignedExprVal; + if (IsOutputUnsignedExpr->isIntegerConstantExpr(IsOutputUnsignedExprVal, + S.Context)) { + IsOutputUnsignedFlagValue = IsOutputUnsignedExprVal.getBoolValue(); + } else { + S.Diags.Report(IsOutputUnsignedExpr->getExprLoc(), diag::err_expr_not_ice) + << 0; + return; + } + + Expr *OutputVectorExpr = CE->getArg(kMatVecMulOutputVectorIdx); + unsigned OutputVectorSizeValue = 0; + if (IsHLSLVecType(OutputVectorExpr->getType())) { + OutputVectorSizeValue = GetHLSLVecSize(OutputVectorExpr->getType()); + QualType OutputVectorType = + GetHLSLVecElementType(OutputVectorExpr->getType()); + const Type *OutputVectorTypePtr = OutputVectorType.getTypePtr(); + + // Check if IsOutputUnsigned flag matches output vector type. + // Must be true for unsigned int outputs, false for signed int/float + // outputs. + if (IsOutputUnsignedFlagValue && + !OutputVectorTypePtr->isUnsignedIntegerType()) { + DXASSERT_NOMSG(OutputVectorTypePtr->isSignedIntegerType() || + OutputVectorTypePtr->isFloatingType()); + S.Diags.Report(IsOutputUnsignedExpr->getExprLoc(), + diag::err_hlsl_linalg_isunsigned_incorrect_for_given_type) + << "IsOuputUnsigned" << false + << (OutputVectorTypePtr->isSignedIntegerType() ? 1 : 0); + return; + } else if (!IsOutputUnsignedFlagValue && + OutputVectorTypePtr->isUnsignedIntegerType()) { + S.Diags.Report(IsOutputUnsignedExpr->getExprLoc(), + diag::err_hlsl_linalg_isunsigned_incorrect_for_given_type) + << "IsOuputUnsigned" << true << 2; + return; + } + } + + // Check if isInputUnsigned parameter is a constant + bool IsInputUnsignedFlagValue = false; + Expr *IsInputUnsignedExpr = CE->getArg(kMatVecMulIsInputUnsignedIdx); + llvm::APSInt IsInputUnsignedExprVal; + if (IsInputUnsignedExpr->isIntegerConstantExpr(IsInputUnsignedExprVal, + S.Context)) { + IsInputUnsignedFlagValue = IsInputUnsignedExprVal.getBoolValue(); + } else { + S.Diags.Report(IsInputUnsignedExpr->getExprLoc(), diag::err_expr_not_ice) + << 0; + return; + } + + // Get InputInterpretation, check if it is constant + Expr *InputInterpretationExpr = CE->getArg(kMatVecMulInputInterpretationIdx); + llvm::APSInt InputInterpretationExprVal; + unsigned InputInterpretationValue = 0; + if (InputInterpretationExpr->isIntegerConstantExpr(InputInterpretationExprVal, + S.Context)) { + InputInterpretationValue = InputInterpretationExprVal.getLimitedValue(); + const bool InRegisterInterpretation = true; + if (!IsValidLinalgTypeInterpretation(InputInterpretationValue, + InRegisterInterpretation)) { + S.Diags.Report(InputInterpretationExpr->getExprLoc(), + diag::err_hlsl_linalg_interpretation_value_incorrect) + << std::to_string(InputInterpretationValue) + << InRegisterInterpretation; + return; + } + } else { + S.Diags.Report(InputInterpretationExpr->getExprLoc(), + diag::err_expr_not_ice) + << 0; + return; + } + + bool IsInputVectorPacked = IsPackedType(InputInterpretationValue); + + // For packed types input vector type must be uint and isUnsigned must be + // true. The signedness is determined from the InputInterpretation + Expr *InputVectorExpr = CE->getArg(kMatVecMulInputVectorIdx); + unsigned InputVectorSizeValue = 0; + if (IsHLSLVecType(InputVectorExpr->getType())) { + InputVectorSizeValue = GetHLSLVecSize(InputVectorExpr->getType()); + QualType InputVectorType = + GetHLSLVecElementType(InputVectorExpr->getType()); + unsigned BitWidth = S.Context.getTypeSize(InputVectorType); + bool Is32Bit = (BitWidth == 32); + const Type *InputVectorTypePtr = InputVectorType.getTypePtr(); + + // Check if the isUnsigned flag setting + if (IsInputVectorPacked) { + // Check that the input vector element type is "32bit" + if (!Is32Bit) { + S.Diags.Report( + InputVectorExpr->getExprLoc(), + diag::err_hlsl_linalg_mul_muladd_packed_input_vector_must_be_uint); + return; + } + + // Check that the input vector element type is an unsigned int + if (!InputVectorTypePtr->isUnsignedIntegerType()) { + S.Diags.Report( + InputVectorExpr->getExprLoc(), + diag::err_hlsl_linalg_mul_muladd_packed_input_vector_must_be_uint); + return; + } + + // Check that isInputUnsigned is always true + // Actual signedness is inferred from the InputInterpretation + if (!IsInputUnsignedFlagValue) { + S.Diags.Report( + IsInputUnsignedExpr->getExprLoc(), + diag:: + err_hlsl_linalg_mul_muladd_isUnsigned_for_packed_input_must_be_true); + return; + } + } else { + if (IsInputUnsignedFlagValue && + !InputVectorTypePtr->isUnsignedIntegerType()) { + DXASSERT_NOMSG(InputVectorTypePtr->isSignedIntegerType() || + InputVectorTypePtr->isFloatingType()); + S.Diags.Report( + IsInputUnsignedExpr->getExprLoc(), + diag::err_hlsl_linalg_isunsigned_incorrect_for_given_type) + << "IsInputUnsigned" << false + << (InputVectorTypePtr->isSignedIntegerType() ? 1 : 0); + return; + } else if (!IsInputUnsignedFlagValue && + InputVectorTypePtr->isUnsignedIntegerType()) { + S.Diags.Report( + IsInputUnsignedExpr->getExprLoc(), + diag::err_hlsl_linalg_isunsigned_incorrect_for_given_type) + << "IsInputUnsigned" << true << 2; + return; + } + } + } + + // Get Matrix Dimensions M and K, check if they are constants + Expr *MatrixKExpr = CE->getArg(kMatVecMulMatrixKIdx); + llvm::APSInt MatrixKExprVal; + unsigned MatrixKValue = 0; + if (MatrixKExpr->isIntegerConstantExpr(MatrixKExprVal, S.Context)) { + MatrixKValue = MatrixKExprVal.getLimitedValue(); + } else { + S.Diags.Report(MatrixKExpr->getExprLoc(), diag::err_expr_not_ice) << 0; + return; + } + + Expr *MatrixMExpr = CE->getArg(kMatVecMulMatrixMIdx); + llvm::APSInt MatrixMExprVal; + unsigned MatrixMValue = 0; + if (MatrixMExpr->isIntegerConstantExpr(MatrixMExprVal, S.Context)) { + MatrixMValue = MatrixMExprVal.getLimitedValue(); + } else { + S.Diags.Report(MatrixMExpr->getExprLoc(), diag::err_expr_not_ice) << 0; + return; + } + + // Check MatrixM and MatrixK values are non-zero + if (MatrixMValue == 0) { + S.Diags.Report(MatrixMExpr->getExprLoc(), + diag::err_hlsl_linalg_matrix_dim_must_be_greater_than_zero) + << std::to_string(DXIL::kSM69MaxVectorLength); + return; + } + + if (MatrixKValue == 0) { + S.Diags.Report(MatrixKExpr->getExprLoc(), + diag::err_hlsl_linalg_matrix_dim_must_be_greater_than_zero) + << std::to_string(DXIL::kSM69MaxVectorLength); + return; + } + + // Check MatrixM and MatrixK values are less than max + // Matrix dimension cannot exceed largest vector length in a Mul/MulAdd + // operation. + if (MatrixMValue > DXIL::kSM69MaxVectorLength) { + S.Diags.Report(MatrixMExpr->getExprLoc(), + diag::err_hlsl_linalg_mul_muladd_invalid_dim) + << 0 << std::to_string(DXIL::kSM69MaxVectorLength); + return; + } + + // For packed input vectors 4 values are packed in a uint, so max Matrix K + // can be 4096 + if (IsInputVectorPacked) { + const unsigned PackingFactor = + 4; // Only supported packed formats: DATA_TYPE_(U)SINT8_T4_PACKED + if (MatrixKValue > DXIL::kSM69MaxVectorLength * PackingFactor) { + S.Diags.Report(MatrixKExpr->getExprLoc(), + diag::err_hlsl_linalg_mul_muladd_invalid_dim) + << 2 << std::to_string(DXIL::kSM69MaxVectorLength * PackingFactor); + return; + } + } else { + if (MatrixKValue > DXIL::kSM69MaxVectorLength) { + S.Diags.Report(MatrixKExpr->getExprLoc(), + diag::err_hlsl_linalg_mul_muladd_invalid_dim) + << 1 << std::to_string(DXIL::kSM69MaxVectorLength); + return; + } + } + + if (!IsValidVectorAndMatrixDimensions(S, CE, InputVectorSizeValue, + OutputVectorSizeValue, MatrixKValue, + MatrixMValue, IsInputVectorPacked)) { + return; + } + + // Get MatrixInterpretation, check if it is constant + // Make sure it is a valid value + Expr *MatrixInterpretationExpr = + CE->getArg(kMatVecMulMatrixInterpretationIdx); + llvm::APSInt MatrixInterpretationExprVal; + unsigned MatrixInterpretationValue = 0; + if (MatrixInterpretationExpr->isIntegerConstantExpr( + MatrixInterpretationExprVal, S.Context)) { + MatrixInterpretationValue = MatrixInterpretationExprVal.getLimitedValue(); + const bool InRegisterInterpretation = false; + if (!IsValidLinalgTypeInterpretation(MatrixInterpretationValue, + InRegisterInterpretation)) { + S.Diags.Report(MatrixInterpretationExpr->getExprLoc(), + diag::err_hlsl_linalg_interpretation_value_incorrect) + << std::to_string(MatrixInterpretationValue) + << InRegisterInterpretation; + return; + } + } else { + S.Diags.Report(MatrixInterpretationExpr->getExprLoc(), + diag::err_expr_not_ice) + << 0; + return; + } + + // Get MatrixLayout, check if it is constant and valid value + Expr *MatrixLayoutExpr = CE->getArg(kMatVecMulMatrixLayoutIdx); + llvm::APSInt MatrixLayoutExprVal; + unsigned MatrixLayoutValue = 0; + if (MatrixLayoutExpr->isIntegerConstantExpr(MatrixLayoutExprVal, S.Context)) { + MatrixLayoutValue = MatrixLayoutExprVal.getLimitedValue(); + if (!IsValidMatrixLayoutForMulAndMulAddOps(MatrixLayoutValue)) { + S.Diags.Report(MatrixLayoutExpr->getExprLoc(), + diag::err_hlsl_linalg_matrix_layout_invalid) + << std::to_string(MatrixLayoutValue) + << std::to_string( + static_cast(DXIL::LinalgMatrixLayout::RowMajor)) + << std::to_string(static_cast( + DXIL::LinalgMatrixLayout::OuterProductOptimal)); + return; + } + } else { + S.Diags.Report(MatrixLayoutExpr->getExprLoc(), diag::err_expr_not_ice) << 0; + return; + } + + // Get MatrixTranspose, check if it is constant + Expr *MatrixTransposeExpr = CE->getArg(kMatVecMulMatrixTransposeIdx); + llvm::APSInt MatrixTransposeExprVal; + unsigned MatrixTransposeValue = 0; + if (MatrixTransposeExpr->isIntegerConstantExpr(MatrixTransposeExprVal, + S.Context)) { + MatrixTransposeValue = MatrixTransposeExprVal.getBoolValue(); + if (!IsValidTransposeForMatrixLayout(MatrixLayoutValue, + MatrixTransposeValue)) { + + S.Diags.Report(MatrixTransposeExpr->getExprLoc(), + diag::err_hlsl_linalg_matrix_layout_is_not_transposable); + return; + } + } else { + S.Diags.Report(MatrixTransposeExpr->getExprLoc(), diag::err_expr_not_ice) + << 0; + return; + } + + // Get MatrixStride, check if it is constant, if yes it should be zero + // for optimal layouts + Expr *MatrixStrideExpr = CE->getArg(kMatVecMulMatrixStrideIdx); + llvm::APSInt MatrixStrideExprVal; + unsigned MatrixStrideValue = 0; + if (MatrixStrideExpr->isIntegerConstantExpr(MatrixStrideExprVal, S.Context)) { + MatrixStrideValue = MatrixStrideExprVal.getLimitedValue(); + if (IsOptimalTypeMatrixLayout(MatrixLayoutValue) && + MatrixStrideValue != 0) { + S.Diags.Report( + MatrixStrideExpr->getExprLoc(), + diag:: + err_hlsl_linalg_optimal_matrix_layout_matrix_stride_must_be_zero); + return; + } + } +} + +static void CheckMulCall(Sema &S, FunctionDecl *FD, CallExpr *CE, + const hlsl::ShaderModel *SM) { + CheckCommonMulAndMulAddParameters(S, CE, SM); +} + +static void CheckMulAddCall(Sema &S, FunctionDecl *FD, CallExpr *CE, + const hlsl::ShaderModel *SM) { + CheckCommonMulAndMulAddParameters(S, CE, SM); + + // Check if BiasInterpretation is constant and a valid value + Expr *BiasInterpretationExpr = CE->getArg(kMatVecMulAddBiasInterpretation); + llvm::APSInt BiasInterpretationExprVal; + unsigned BiasInterpretationValue = 0; + if (BiasInterpretationExpr->isIntegerConstantExpr(BiasInterpretationExprVal, + S.Context)) { + BiasInterpretationValue = BiasInterpretationExprVal.getLimitedValue(); + const bool InRegisterInterpretation = false; + if (!IsValidLinalgTypeInterpretation(BiasInterpretationValue, + InRegisterInterpretation)) { + S.Diags.Report(BiasInterpretationExpr->getExprLoc(), + diag::err_hlsl_linalg_interpretation_value_incorrect) + << std::to_string(BiasInterpretationValue) + << InRegisterInterpretation; + return; + } + } else { + S.Diags.Report(BiasInterpretationExpr->getExprLoc(), diag::err_expr_not_ice) + << 0; + return; + } +} + +// Linalg Outer Product Accumulate +// OuterProductAccumulate builtin function parameters +static const unsigned kOuterProdAccInputVector1Idx = 0; +static const unsigned kOuterProdAccInputVector2Idx = 1; +// static const unsigned kOuterProdAccMatrixBufferIdx = 2; +// static const unsigned kOuterProdAccMatrixOffsetIdx = 3; +static const unsigned kOuterProdAccMatrixInterpretationIdx = 4; +static const unsigned kOuterProdAccMatrixLayoutIdx = 5; +static const unsigned kOuterProdAccMatrixStrideIdx = 6; + +static void CheckOuterProductAccumulateCall(Sema &S, FunctionDecl *FD, + CallExpr *CE) { + // Check InputVector1 and InputVector2 are the same type + const Expr *InputVector1Expr = CE->getArg(kOuterProdAccInputVector1Idx); + const Expr *InputVector2Expr = CE->getArg(kOuterProdAccInputVector2Idx); + QualType InputVector1Type = InputVector1Expr->getType(); + QualType InputVector2Type = InputVector2Expr->getType(); + + // Get the element types of the vectors + const QualType InputVector1ElementType = + GetHLSLVecElementType(InputVector1Type); + const QualType InputVector2ElementType = + GetHLSLVecElementType(InputVector2Type); + + if (!S.Context.hasSameType(InputVector1ElementType, + InputVector2ElementType)) { + S.Diags.Report(InputVector2Expr->getExprLoc(), + diag::err_hlsl_linalg_outer_prod_acc_vector_type_mismatch); + return; + } + + // Check Matrix Interpretation is a constant and a valid value + Expr *MatrixInterpretationExpr = + CE->getArg(kOuterProdAccMatrixInterpretationIdx); + llvm::APSInt MatrixInterpretationExprVal; + unsigned MatrixInterpretationValue = 0; + if (MatrixInterpretationExpr->isIntegerConstantExpr( + MatrixInterpretationExprVal, S.Context)) { + MatrixInterpretationValue = MatrixInterpretationExprVal.getLimitedValue(); + const bool InRegisterInterpretation = false; + if (!IsValidLinalgTypeInterpretation(MatrixInterpretationValue, + InRegisterInterpretation)) { + S.Diags.Report(MatrixInterpretationExpr->getExprLoc(), + diag::err_hlsl_linalg_interpretation_value_incorrect) + << std::to_string(MatrixInterpretationValue) + << InRegisterInterpretation; + return; + } + } else { + S.Diags.Report(MatrixInterpretationExpr->getExprLoc(), + diag::err_expr_not_ice) + << 0; + return; + } + + // Check Matrix Layout must be a constant and Training Optimal + Expr *MatrixLayoutExpr = CE->getArg(kOuterProdAccMatrixLayoutIdx); + llvm::APSInt MatrixLayoutExprVal; + unsigned MatrixLayoutValue = 0; + if (MatrixLayoutExpr->isIntegerConstantExpr(MatrixLayoutExprVal, S.Context)) { + MatrixLayoutValue = MatrixLayoutExprVal.getLimitedValue(); + if (MatrixLayoutValue != + static_cast(DXIL::LinalgMatrixLayout::OuterProductOptimal)) { + S.Diags.Report( + MatrixLayoutExpr->getExprLoc(), + diag:: + err_hlsl_linalg_outer_prod_acc_matrix_layout_must_be_outer_prod_acc_optimal) + << std::to_string(static_cast( + DXIL::LinalgMatrixLayout::OuterProductOptimal)); + return; + } + } else { + S.Diags.Report(MatrixLayoutExpr->getExprLoc(), diag::err_expr_not_ice) << 0; + return; + } + + // Matrix Stride must be zero (Training Optimal matrix layout) + Expr *MatrixStrideExpr = CE->getArg(kOuterProdAccMatrixStrideIdx); + llvm::APSInt MatrixStrideExprVal; + unsigned MatrixStrideValue = 0; + if (MatrixStrideExpr->isIntegerConstantExpr(MatrixStrideExprVal, S.Context)) { + MatrixStrideValue = MatrixStrideExprVal.getLimitedValue(); + if (MatrixStrideValue != 0) { + S.Diags.Report( + MatrixStrideExpr->getExprLoc(), + diag:: + err_hlsl_linalg_optimal_matrix_layout_matrix_stride_must_be_zero); + return; + } + } +} + #ifdef ENABLE_SPIRV_CODEGEN static bool CheckVKBufferPointerCast(Sema &S, FunctionDecl *FD, CallExpr *CE, bool isStatic) { @@ -11656,9 +12276,78 @@ static bool CheckVKBufferPointerCast(Sema &S, FunctionDecl *FD, CallExpr *CE, } #endif +static bool isRelatedDeclMarkedNointerpolation(Expr *E) { + if (!E) + return false; + E = E->IgnoreCasts(); + if (auto *DRE = dyn_cast(E)) + return DRE->getDecl()->hasAttr(); + + if (auto *ME = dyn_cast(E)) + return ME->getMemberDecl()->hasAttr() || + isRelatedDeclMarkedNointerpolation(ME->getBase()); + + if (auto *HVE = dyn_cast(E)) + return isRelatedDeclMarkedNointerpolation(HVE->getBase()); + + if (auto *ASE = dyn_cast(E)) + return isRelatedDeclMarkedNointerpolation(ASE->getBase()); + + return false; +} + +static bool CheckIntrinsicGetAttributeAtVertex(Sema &S, FunctionDecl *FDecl, + CallExpr *TheCall) { + assert(TheCall->getNumArgs() > 0); + auto argument = TheCall->getArg(0)->IgnoreCasts(); + + if (!isRelatedDeclMarkedNointerpolation(argument)) { + S.Diag(argument->getExprLoc(), diag::err_hlsl_parameter_requires_attribute) + << 0 << FDecl->getName() << "nointerpolation"; + return true; + } + + return false; +} + +static bool CheckNoInterpolationParams(Sema &S, FunctionDecl *FDecl, + CallExpr *TheCall) { + // See #hlsl-specs/issues/181. Feature is broken. For SPIR-V we want + // to limit the scope, and fail gracefully in some cases. + if (!S.getLangOpts().SPIRV) + return false; + + bool error = false; + for (unsigned i = 0; i < FDecl->getNumParams(); i++) { + assert(i < TheCall->getNumArgs()); + + if (!FDecl->getParamDecl(i)->hasAttr()) + continue; + + if (!isRelatedDeclMarkedNointerpolation(TheCall->getArg(i))) { + S.Diag(TheCall->getArg(i)->getExprLoc(), + diag::err_hlsl_parameter_requires_attribute) + << i << FDecl->getName() << "nointerpolation"; + error = true; + } + } + + return error; +} + +// Verify that user-defined intrinsic struct args contain no long vectors +static bool CheckUDTIntrinsicArg(Sema &S, Expr *Arg) { + const TypeDiagContext DiagContext = + TypeDiagContext::UserDefinedStructParameter; + return DiagnoseTypeElements(S, Arg->getExprLoc(), Arg->getType(), DiagContext, + DiagContext); +} + // Check HLSL call constraints, not fatal to creating the AST. -void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, - const FunctionProtoType *Proto) { +void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { + if (CheckNoInterpolationParams(*this, FDecl, TheCall)) + return; + HLSLIntrinsicAttr *IntrinsicAttr = FDecl->getAttr(); if (!IntrinsicAttr) return; @@ -11677,6 +12366,37 @@ void Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall, case hlsl::IntrinsicOp::IOP_Barrier: CheckBarrierCall(*this, FDecl, TheCall, SM); break; + case hlsl::IntrinsicOp::IOP___builtin_MatVecMul: + CheckMulCall(*this, FDecl, TheCall, SM); + break; + case hlsl::IntrinsicOp::IOP___builtin_MatVecMulAdd: + CheckMulAddCall(*this, FDecl, TheCall, SM); + break; + case hlsl::IntrinsicOp::IOP___builtin_OuterProductAccumulate: + CheckOuterProductAccumulateCall(*this, FDecl, TheCall); + break; + case hlsl::IntrinsicOp::IOP_GetAttributeAtVertex: + // See #hlsl-specs/issues/181. Feature is broken. For SPIR-V we want + // to limit the scope, and fail gracefully in some cases. + if (!getLangOpts().SPIRV) + return; + CheckIntrinsicGetAttributeAtVertex(*this, FDecl, TheCall); + break; + case hlsl::IntrinsicOp::IOP_DispatchMesh: + CheckUDTIntrinsicArg(*this, TheCall->getArg(3)->IgnoreCasts()); + break; + case hlsl::IntrinsicOp::IOP_CallShader: + CheckUDTIntrinsicArg(*this, TheCall->getArg(1)->IgnoreCasts()); + break; + case hlsl::IntrinsicOp::IOP_TraceRay: + CheckUDTIntrinsicArg(*this, TheCall->getArg(7)->IgnoreCasts()); + break; + case hlsl::IntrinsicOp::IOP_ReportHit: + CheckIntersectionAttributeArg(*this, TheCall->getArg(2)->IgnoreCasts()); + break; + case hlsl::IntrinsicOp::MOP_DxHitObject_GetAttributes: + CheckIntersectionAttributeArg(*this, TheCall->getArg(0)->IgnoreCasts()); + break; #ifdef ENABLE_SPIRV_CODEGEN case hlsl::IntrinsicOp::IOP_Vkreinterpret_pointer_cast: CheckVKBufferPointerCast(*this, FDecl, TheCall, false); @@ -12066,8 +12786,11 @@ void Sema::DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM, case hlsl::IntrinsicOp::MOP_TraceRayInline: DiagnoseTraceRayInline(*this, CE); break; + case hlsl::IntrinsicOp::MOP_DxHitObject_FromRayQuery: + case hlsl::IntrinsicOp::MOP_DxHitObject_Invoke: case hlsl::IntrinsicOp::MOP_DxHitObject_MakeMiss: case hlsl::IntrinsicOp::MOP_DxHitObject_MakeNop: + case hlsl::IntrinsicOp::MOP_DxHitObject_TraceRay: DiagnoseReachableSERCall(*this, CE, EntrySK, EntryDecl, false); break; case hlsl::IntrinsicOp::IOP_DxMaybeReorderThread: @@ -12080,34 +12803,73 @@ void Sema::DiagnoseReachableHLSLCall(CallExpr *CE, const hlsl::ShaderModel *SM, ///////////////////////////////////////////////////////////////////////////// -bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc, - QualType ArgTy, bool &Empty, - const FieldDecl *FD) { - DXASSERT_NOMSG(!ArgTy.isNull()); +static bool AllowObjectInContext(QualType Ty, TypeDiagContext DiagContext) { + // Disallow all object in template type parameters (former + // err_hlsl_objectintemplateargument) + if (DiagContext == TypeDiagContext::TypeParameter) + return false; + // Disallow all objects in node records (former + // err_hlsl_node_record_object) + if (DiagContext == TypeDiagContext::NodeRecords) + return false; + // TODO: Extend this list for other object types. + if (IsHLSLHitObjectType(Ty)) + return false; + return true; +} - HLSLExternalSource *source = HLSLExternalSource::FromSema(self); - ArTypeObjectKind shapeKind = source->GetTypeObjectKind(ArgTy); - switch (shapeKind) { +// Determine if `Ty` is valid in this `DiagContext` and/or an empty type. If +// invalid returns false and Sema `S`, location `Loc`, error index +// `DiagContext`, and FieldDecl `FD` are used to emit diagnostics. If +// `CheckLongVec` is set, errors are produced if `Ty` is a long vector. If the +// type is not empty, `Empty` is set to false. `CheckedDecls` is used to prevent +// redundant recursive type checks. +static bool +DiagnoseElementTypes(Sema &S, SourceLocation Loc, QualType Ty, bool &Empty, + TypeDiagContext ObjDiagContext, + TypeDiagContext LongVecDiagContext, + llvm::SmallPtrSet &CheckedDecls, + const clang::FieldDecl *FD) { + if (Ty.isNull() || Ty->isDependentType()) + return false; + + const bool CheckLongVec = LongVecDiagContext != TypeDiagContext::Valid; + const bool CheckObjects = ObjDiagContext != TypeDiagContext::Valid; + + while (const ArrayType *Arr = Ty->getAsArrayTypeUnsafe()) + Ty = Arr->getElementType(); + + const int ObjDiagContextIdx = static_cast(ObjDiagContext); + const int LongVecDiagContextIdx = static_cast(LongVecDiagContext); + DXASSERT_NOMSG( + LongVecDiagContext == TypeDiagContext::Valid || + (0 <= LongVecDiagContextIdx && + LongVecDiagContextIdx <= + static_cast(TypeDiagContext::LongVecDiagMaxSelectIndex))); + + HLSLExternalSource *Source = HLSLExternalSource::FromSema(&S); + ArTypeObjectKind ShapeKind = Source->GetTypeObjectKind(Ty); + switch (ShapeKind) { case AR_TOBJ_VECTOR: - if (GetHLSLVecSize(ArgTy) > DXIL::kDefaultMaxVectorLength) { - const unsigned NodeRecordsIdx = 3; - self->Diag(ArgLoc.getLocation(), diag::err_hlsl_unsupported_long_vector) - << NodeRecordsIdx; + if (CheckLongVec && GetHLSLVecSize(Ty) > DXIL::kDefaultMaxVectorLength) { + S.Diag(Loc, diag::err_hlsl_unsupported_long_vector) + << LongVecDiagContextIdx; Empty = false; return false; } LLVM_FALLTHROUGH; - case AR_TOBJ_ARRAY: case AR_TOBJ_BASIC: case AR_TOBJ_MATRIX: Empty = false; return false; case AR_TOBJ_OBJECT: Empty = false; - self->Diag(ArgLoc.getLocation(), diag::err_hlsl_node_record_object) - << ArgTy << ArgLoc.getSourceRange(); + if (!CheckObjects || AllowObjectInContext(Ty, ObjDiagContext)) + return false; + S.Diag(Loc, diag::err_hlsl_unsupported_object_context) + << Ty << ObjDiagContextIdx; if (FD) - self->Diag(FD->getLocation(), diag::note_field_declared_here) + S.Diag(FD->getLocation(), diag::note_field_declared_here) << FD->getType() << FD->getSourceRange(); return true; case AR_TOBJ_DEPENDENT: @@ -12116,25 +12878,55 @@ bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc, return true; case AR_TOBJ_COMPOUND: { bool ErrorFound = false; - const RecordDecl *RD = ArgTy->getAs()->getDecl(); + const RecordDecl *RD = Ty->getAs()->getDecl(); + // Never recurse redundantly into related subtypes that have already been + // checked. + if (!CheckedDecls.insert(RD).second) + return false; + // Check the fields of the RecordDecl - for (auto *FD : RD->fields()) + for (auto *ElemFD : RD->fields()) { ErrorFound |= - DiagnoseNodeStructArgument(self, ArgLoc, FD->getType(), Empty, FD); - if (RD->isCompleteDefinition()) - if (auto *Child = dyn_cast(RD)) - // Walk up the inheritance chain and check base class fields - for (auto &B : Child->bases()) - ErrorFound |= - DiagnoseNodeStructArgument(self, ArgLoc, B.getType(), Empty); + DiagnoseElementTypes(S, Loc, ElemFD->getType(), Empty, ObjDiagContext, + LongVecDiagContext, CheckedDecls, ElemFD); + } + if (!RD->isCompleteDefinition()) + return ErrorFound; + + if (auto *Child = dyn_cast(RD)) + // Walk up the inheritance chain and check base class fields + for (auto &B : Child->bases()) + ErrorFound |= + DiagnoseElementTypes(S, Loc, B.getType(), Empty, ObjDiagContext, + LongVecDiagContext, CheckedDecls, nullptr); return ErrorFound; } default: - DXASSERT(false, "unreachable"); + // Not a recursive type, no element types to check here + Empty = false; return false; } } +bool hlsl::DiagnoseTypeElements(Sema &S, SourceLocation Loc, QualType Ty, + TypeDiagContext ObjDiagContext, + TypeDiagContext LongVecDiagContext, + const clang::FieldDecl *FD) { + bool Empty = false; + llvm::SmallPtrSet CheckedDecls; + return DiagnoseElementTypes(S, Loc, Ty, Empty, ObjDiagContext, + LongVecDiagContext, CheckedDecls, FD); +} + +bool hlsl::DiagnoseNodeStructArgument(Sema *self, TemplateArgumentLoc ArgLoc, + QualType ArgTy, bool &Empty, + const FieldDecl *FD) { + llvm::SmallPtrSet CheckedDecls; + return DiagnoseElementTypes(*self, ArgLoc.getLocation(), ArgTy, Empty, + TypeDiagContext::NodeRecords, + TypeDiagContext::NodeRecords, CheckedDecls, FD); +} + // This function diagnoses whether or not all entry-point attributes // should exist on this shader stage void DiagnoseEntryAttrAllowedOnStage(clang::Sema *self, @@ -12562,21 +13354,6 @@ bool hlsl::ShouldSkipNRVO(clang::Sema &sema, clang::QualType returnType, return false; } -bool hlsl::ContainsLongVector(QualType QT) { - if (QT.isNull() || QT->isDependentType()) - return false; - - while (const ArrayType *Arr = QT->getAsArrayTypeUnsafe()) - QT = Arr->getElementType(); - - if (CXXRecordDecl *Decl = QT->getAsCXXRecordDecl()) { - if (!Decl->isCompleteDefinition()) - return false; - return Decl->hasHLSLLongVector(); - } - return false; -} - bool hlsl::IsConversionToLessOrEqualElements( clang::Sema *self, const clang::ExprResult &sourceExpr, const clang::QualType &targetType, bool explicitConversion) { @@ -15247,8 +16024,8 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, result = false; } - // Disallow long vecs from $Global cbuffers. - if (isGlobal && !isStatic && !isGroupShared && !IS_BASIC_OBJECT(basicKind)) { + // Disallow intangible HLSL objects in the global scope. + if (isGlobal) { // Suppress actual emitting of errors for incompletable types here // They are redundant to those produced in ActOnUninitializedDecl. struct SilentDiagnoser : public TypeDiagnoser { @@ -15256,12 +16033,22 @@ bool Sema::DiagnoseHLSLDecl(Declarator &D, DeclContext *DC, Expr *BitWidth, virtual void diagnose(Sema &S, SourceLocation Loc, QualType T) {} } SD; RequireCompleteType(D.getLocStart(), qt, SD); - if (ContainsLongVector(qt)) { - unsigned CbuffersOrTbuffersIdx = 4; - Diag(D.getLocStart(), diag::err_hlsl_unsupported_long_vector) - << CbuffersOrTbuffersIdx; + + // Disallow objects in the global context + TypeDiagContext ObjDiagContext = TypeDiagContext::CBuffersOrTBuffers; + if (isGroupShared) + ObjDiagContext = TypeDiagContext::GroupShared; + else if (isStatic) + ObjDiagContext = TypeDiagContext::GlobalVariables; + + TypeDiagContext LongVecDiagContext = TypeDiagContext::Valid; + + // Disallow long vecs from $Global cbuffers. + if (!isStatic && !isGroupShared && !IS_BASIC_OBJECT(basicKind)) + LongVecDiagContext = TypeDiagContext::CBuffersOrTBuffers; + if (DiagnoseTypeElements(*this, D.getLocStart(), qt, ObjDiagContext, + LongVecDiagContext)) result = false; - } } // SPIRV change starts @@ -16144,121 +16931,6 @@ QualType Sema::getHLSLDefaultSpecialization(TemplateDecl *Decl) { return QualType(); } -static bool isRelatedDeclMarkedNointerpolation(Expr *E) { - if (!E) - return false; - E = E->IgnoreCasts(); - if (auto *DRE = dyn_cast(E)) - return DRE->getDecl()->hasAttr(); - - if (auto *ME = dyn_cast(E)) - return ME->getMemberDecl()->hasAttr() || - isRelatedDeclMarkedNointerpolation(ME->getBase()); - - if (auto *HVE = dyn_cast(E)) - return isRelatedDeclMarkedNointerpolation(HVE->getBase()); - - if (auto *ASE = dyn_cast(E)) - return isRelatedDeclMarkedNointerpolation(ASE->getBase()); - - return false; -} - -// Verify that user-defined intrinsic struct args contain no long vectors -static bool CheckUDTIntrinsicArg(Sema *S, Expr *Arg) { - if (ContainsLongVector(Arg->getType())) { - const unsigned UserDefinedStructParameterIdx = 5; - S->Diag(Arg->getExprLoc(), diag::err_hlsl_unsupported_long_vector) - << UserDefinedStructParameterIdx; - return true; - } - return false; -} - -static bool CheckIntrinsicGetAttributeAtVertex(Sema *S, FunctionDecl *FDecl, - CallExpr *TheCall) { - assert(TheCall->getNumArgs() > 0); - auto argument = TheCall->getArg(0)->IgnoreCasts(); - - if (!isRelatedDeclMarkedNointerpolation(argument)) { - S->Diag(argument->getExprLoc(), diag::err_hlsl_parameter_requires_attribute) - << 0 << FDecl->getName() << "nointerpolation"; - return true; - } - - return false; -} - -bool Sema::CheckHLSLIntrinsicCall(FunctionDecl *FDecl, CallExpr *TheCall) { - auto attr = FDecl->getAttr(); - - if (!attr) - return false; - - if (!IsBuiltinTable(attr->getGroup())) - return false; - - switch (hlsl::IntrinsicOp(attr->getOpcode())) { - case hlsl::IntrinsicOp::IOP_GetAttributeAtVertex: - // See #hlsl-specs/issues/181. Feature is broken. For SPIR-V we want - // to limit the scope, and fail gracefully in some cases. - if (!getLangOpts().SPIRV) - return false; - // This should never happen for SPIR-V. But on the DXIL side, extension can - // be added by inserting new intrinsics, meaning opcodes can collide with - // existing ones. See the ExtensionTest.EvalAttributeCollision test. - assert(FDecl->getName() == "GetAttributeAtVertex"); - return CheckIntrinsicGetAttributeAtVertex(this, FDecl, TheCall); - case hlsl::IntrinsicOp::IOP_DispatchMesh: - assert(TheCall->getNumArgs() > 3); - assert(FDecl->getName() == "DispatchMesh"); - return CheckUDTIntrinsicArg(this, TheCall->getArg(3)->IgnoreCasts()); - case hlsl::IntrinsicOp::IOP_CallShader: - assert(TheCall->getNumArgs() > 1); - assert(FDecl->getName() == "CallShader"); - return CheckUDTIntrinsicArg(this, TheCall->getArg(1)->IgnoreCasts()); - case hlsl::IntrinsicOp::IOP_TraceRay: - assert(TheCall->getNumArgs() > 7); - assert(FDecl->getName() == "TraceRay"); - return CheckUDTIntrinsicArg(this, TheCall->getArg(7)->IgnoreCasts()); - case hlsl::IntrinsicOp::IOP_ReportHit: - assert(TheCall->getNumArgs() > 2); - assert(FDecl->getName() == "ReportHit"); - return CheckUDTIntrinsicArg(this, TheCall->getArg(2)->IgnoreCasts()); - default: - break; - } - - return false; -} - -bool Sema::CheckHLSLFunctionCall(FunctionDecl *FDecl, CallExpr *TheCall) { - if (hlsl::IsIntrinsicOp(FDecl) && CheckHLSLIntrinsicCall(FDecl, TheCall)) - return true; - - // See #hlsl-specs/issues/181. Feature is broken. For SPIR-V we want - // to limit the scope, and fail gracefully in some cases. - if (!getLangOpts().SPIRV) - return false; - - bool error = false; - for (unsigned i = 0; i < FDecl->getNumParams(); i++) { - assert(i < TheCall->getNumArgs()); - - if (!FDecl->getParamDecl(i)->hasAttr()) - continue; - - if (!isRelatedDeclMarkedNointerpolation(TheCall->getArg(i))) { - Diag(TheCall->getArg(i)->getExprLoc(), - diag::err_hlsl_parameter_requires_attribute) - << i << FDecl->getName() << "nointerpolation"; - error = true; - } - } - - return error; -} - namespace hlsl { static bool nodeInputIsCompatible(DXIL::NodeIOKind IOType, @@ -16482,6 +17154,10 @@ void DiagnoseNodeEntry(Sema &S, FunctionDecl *FD, llvm::StringRef StageName, DXIL::ShaderKind shaderKind = ShaderModel::KindFromFullName(StageName); if (shaderKind == DXIL::ShaderKind::Node) { NodeLoc = pAttr->getLocation(); + // SPIR-V node shader support is experimental + if (S.getLangOpts().SPIRV) { + S.Diag(NodeLoc, diag::warn_spirv_node_shaders_experimental); + } } if (NodeLoc.isInvalid()) { return; @@ -16909,18 +17585,15 @@ void DiagnoseEntry(Sema &S, FunctionDecl *FD) { // Would be nice to check for resources here as they crash the compiler now. // See issue #7186. for (const auto *param : FD->params()) { - if (ContainsLongVector(param->getType())) { - const unsigned EntryFunctionParametersIdx = 6; - S.Diag(param->getLocation(), diag::err_hlsl_unsupported_long_vector) - << EntryFunctionParametersIdx; - } + const TypeDiagContext DiagContext = + TypeDiagContext::EntryFunctionParameters; + hlsl::DiagnoseTypeElements(S, param->getLocation(), param->getType(), + DiagContext, DiagContext); } - if (ContainsLongVector(FD->getReturnType())) { - const unsigned EntryFunctionReturnIdx = 7; - S.Diag(FD->getLocation(), diag::err_hlsl_unsupported_long_vector) - << EntryFunctionReturnIdx; - } + const TypeDiagContext DiagContext = TypeDiagContext::EntryFunctionReturnType; + DiagnoseTypeElements(S, FD->getLocation(), FD->getReturnType(), DiagContext, + DiagContext); DXIL::ShaderKind Stage = ShaderModel::KindFromFullName(shaderAttr->getStage()); diff --git a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp index abca7cbf86..a3ca955802 100644 --- a/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp +++ b/tools/clang/lib/Sema/SemaHLSLDiagnoseTU.cpp @@ -709,20 +709,18 @@ void hlsl::DiagnoseTranslationUnit(clang::Sema *self) { << hullPatchCount.value(); } } - for (const auto *param : pPatchFnDecl->params()) - if (ContainsLongVector(param->getType())) { - const unsigned PatchConstantFunctionParametersIdx = 8; - self->Diag(param->getLocation(), - diag::err_hlsl_unsupported_long_vector) - << PatchConstantFunctionParametersIdx; - } - - if (ContainsLongVector(pPatchFnDecl->getReturnType())) { - const unsigned PatchConstantFunctionReturnIdx = 9; - self->Diag(pPatchFnDecl->getLocation(), - diag::err_hlsl_unsupported_long_vector) - << PatchConstantFunctionReturnIdx; + for (const auto *param : pPatchFnDecl->params()) { + const TypeDiagContext ParamDiagContext = + TypeDiagContext::PatchConstantFunctionParameters; + DiagnoseTypeElements(*self, param->getLocation(), param->getType(), + ParamDiagContext, ParamDiagContext); } + + const TypeDiagContext ReturnDiagContext = + TypeDiagContext::PatchConstantFunctionReturnType; + DiagnoseTypeElements(*self, pPatchFnDecl->getLocation(), + pPatchFnDecl->getReturnType(), ReturnDiagContext, + ReturnDiagContext); } DXIL::ShaderKind EntrySK = shaderModel->GetKind(); DXIL::NodeLaunchType NodeLaunchTy = DXIL::NodeLaunchType::Invalid; diff --git a/tools/clang/lib/Sema/SemaLookup.cpp b/tools/clang/lib/Sema/SemaLookup.cpp index 98832a8f57..eec8a7fa64 100644 --- a/tools/clang/lib/Sema/SemaLookup.cpp +++ b/tools/clang/lib/Sema/SemaLookup.cpp @@ -55,6 +55,7 @@ using namespace clang; using namespace sema; +// HLSL Note: This set of utilities copied to SemaHLSL.cpp. namespace { class UnqualUsingEntry { const DeclContext *Nominated; @@ -4809,9 +4810,12 @@ void Sema::diagnoseTypo(const TypoCorrection &Correction, NamedDecl *ChosenDecl = Correction.isKeyword() ? nullptr : Correction.getCorrectionDecl(); - if (PrevNote.getDiagID() && ChosenDecl) + // HLSL Change begin: don't put notes on invalid source locations. + if (PrevNote.getDiagID() && ChosenDecl && + !ChosenDecl->getLocation().isInvalid()) Diag(ChosenDecl->getLocation(), PrevNote) << CorrectedQuotedStr << (ErrorRecovery ? FixItHint() : FixTypo); + // HLSL Change end } TypoExpr *Sema::createDelayedTypo(std::unique_ptr TCC, @@ -4836,3 +4840,33 @@ const Sema::TypoExprState &Sema::getTypoExprState(TypoExpr *TE) const { void Sema::clearDelayedTypo(TypoExpr *TE) { DelayedTypos.erase(TE); } + +// HLSL Change Begin +void Sema::CollectNamespaceContexts(Scope *S, + SmallVectorImpl &NSs) { + UnqualUsingDirectiveSet UDirs; + + // Add using directives from this context up to the top level. This + // handles cases where the current declaration is in a context that has + // a using directive but might be in a scope chain that doesn't reach + // the using directive (i.e. a using inside a namespace or class + // declaration but the function definition is outside). + DeclContext *Ctx = S->getEntity(); + for (DeclContext *UCtx = Ctx; UCtx; UCtx = UCtx->getParent()) { + if (UCtx->isTransparentContext()) + continue; + + UDirs.visit(UCtx, UCtx); + } + // Find the first namespace or translation-unit scope. + Scope *Innermost = S; + while (Innermost && !isNamespaceOrTranslationUnitScope(Innermost)) + Innermost = Innermost->getParent(); + + UDirs.visitScopeChain(S, Innermost); + UDirs.done(); + + for (auto &UD : UDirs) + NSs.push_back(UD.getNominatedNamespace()); +} +// HLSL Change End diff --git a/tools/clang/lib/Sema/SemaOverload.cpp b/tools/clang/lib/Sema/SemaOverload.cpp index 636eaf0213..274b66646b 100644 --- a/tools/clang/lib/Sema/SemaOverload.cpp +++ b/tools/clang/lib/Sema/SemaOverload.cpp @@ -146,8 +146,8 @@ ImplicitConversionRank clang::GetConversionRank(ImplicitConversionKind Kind) { }; static_assert(_countof(Rank) == ICK_Num_Conversion_Kinds, "Otherwise, GetConversionRank is out of sync with ImplicitConversionKind"); // HLSL Change - assert((int)Kind < (int)ICK_Num_Conversion_Kinds); // HLSL Change - return Rank[(int)Kind]; + assert(Kind < _countof(Rank)); // HLSL Change + return Rank[Kind]; // HLSL Change } /// GetImplicitConversionName - Return the name of this kind of @@ -10627,6 +10627,7 @@ static void AddOverloadedCallCandidate(Sema &S, void Sema::AddOverloadedCallCandidates(UnresolvedLookupExpr *ULE, ArrayRef Args, OverloadCandidateSet &CandidateSet, + Scope *S, // HLSL Change bool PartialOverloading) { #ifndef NDEBUG @@ -10659,8 +10660,8 @@ void Sema::AddOverloadedCallCandidates(UnresolvedLookupExpr *ULE, #endif // HLSL Change - allow ExternalSource the ability to add the overloads for a call. - if (ExternalSource && - ExternalSource->AddOverloadedCallCandidates(ULE, Args, CandidateSet, PartialOverloading)) { + if (ExternalSource && ExternalSource->AddOverloadedCallCandidates( + ULE, Args, CandidateSet, S, PartialOverloading)) { return; } @@ -10970,7 +10971,7 @@ bool Sema::buildOverloadedCallSet(Scope *S, Expr *Fn, // Add the functions denoted by the callee to the set of candidate // functions, including those from argument-dependent lookup. - AddOverloadedCallCandidates(ULE, Args, *CandidateSet); + AddOverloadedCallCandidates(ULE, Args, *CandidateSet, S); // HLSL Change if (getLangOpts().MSVCCompat && CurContext->isDependentContext() && !isSFINAEContext() && diff --git a/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp b/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp index 1eacedbb0b..a6ae05faa5 100644 --- a/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp +++ b/tools/clang/lib/Sema/SemaTemplateInstantiate.cpp @@ -2139,18 +2139,6 @@ Sema::InstantiateClass(SourceLocation PointOfInstantiation, SourceLocation(), SourceLocation(), nullptr); CheckCompletedCXXClass(Instantiation); - // HLSL Change Begin - set longvec bit for vectors of over 4 elements - ClassTemplateSpecializationDecl *Spec = - dyn_cast(Instantiation); - if (Spec && Spec->hasAttr()) { - const TemplateArgumentList &argList = Spec->getTemplateArgs(); - const TemplateArgument &arg1 = argList[1]; - llvm::APSInt vecSize = arg1.getAsIntegral(); - if (vecSize.getLimitedValue() > hlsl::DXIL::kDefaultMaxVectorLength) - Instantiation->setHasHLSLLongVector(); - } - // HLSL Change End - set longvec bit for vectors of over 4 elements - // Default arguments are parsed, if not instantiated. We can go instantiate // default arg exprs for default constructors if necessary now. ActOnFinishCXXMemberDefaultArgs(Instantiation); diff --git a/tools/clang/lib/Sema/SemaType.cpp b/tools/clang/lib/Sema/SemaType.cpp index ff3b0dbac7..f08ae486b5 100644 --- a/tools/clang/lib/Sema/SemaType.cpp +++ b/tools/clang/lib/Sema/SemaType.cpp @@ -462,7 +462,7 @@ distributeObjCPointerTypeAttrFromDeclarator(TypeProcessingState &state, // objc_gc goes on the innermost pointer to something that's not a // pointer. - unsigned innermost = -1U; + unsigned innermost = std::numeric_limits::max(); bool considerDeclSpec = true; for (unsigned i = 0, e = declarator.getNumTypeObjects(); i != e; ++i) { DeclaratorChunk &chunk = declarator.getTypeObject(i); @@ -501,7 +501,7 @@ distributeObjCPointerTypeAttrFromDeclarator(TypeProcessingState &state, // Otherwise, if we found an appropriate chunk, splice the attribute // into it. - if (innermost != -1U) { + if (innermost != std::numeric_limits::max()) { moveAttrFromListToList(attr, declarator.getAttrListRef(), declarator.getTypeObject(innermost).getAttrListRef()); return; diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl index 9f7a487a05..572734d679 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-agg-load-stores.hlsl @@ -1,35 +1,35 @@ -// RUN: %dxc -T vs_6_6 -DETY=float -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=bool -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=uint64_t -DCOLS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=double -DCOLS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=float -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=bool -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=uint64_t -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=double -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DETY=float1 -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=bool1 -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=uint64_t1 -DCOLS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=double1 -DCOLS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=float1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=bool1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=uint64_t1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=double1 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI -// RUN: %dxc -T vs_6_6 -DETY=float4 -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=bool4 -DCOLS=4 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=uint64_t4 -DCOLS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DETY=double4 -DCOLS=2 %s | FileCheck %s +// RUN: %dxc -T vs_6_6 -DETY=float4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=bool4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=uint64_t4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DETY=double4 -DCOLS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=2 -DROWS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI // RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=2 -DROWS=2 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=2 -DROWS=2 %s | FileCheck %s -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT -// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MAT +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=float -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=bool -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=uint64_t -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI +// RUN: %dxc -T vs_6_6 -DATY=Matrix -DETY=double -DCOLS=3 -DROWS=3 %s | FileCheck %s --check-prefixes=CHECK,MULTI // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=float -DCOLS=4 %s | FileCheck %s // RUN: %dxc -T vs_6_6 -DATY=Vector -DETY=bool -DCOLS=4 %s | FileCheck %s @@ -105,27 +105,27 @@ RWStructuredBuffer< TYPE SS > RwStBuf : register(u2); ConsumeStructuredBuffer< TYPE SS > CnStBuf : register(u4); AppendStructuredBuffer< TYPE SS > ApStBuf : register(u5); -TYPE Add(TYPE f1[COLS], TYPE f2[COLS])[COLS] { +TYPE Add(TYPE f1[COLS], TYPE f2[COLS], TYPE f3[COLS], TYPE f4[COLS])[COLS] { TYPE ret[COLS]; for (int i = 0; i < COLS; i++) - ret[i] = f1[i] + f2[i]; + ret[i] = f1[i] + f2[i] + f3[i] + f4[i]; return ret; } template -T Add(T v1, T v2) { return v1 + v2; } +T Add(T v1, T v2, T v3, T v4) { return v1 + v2 + v3 + v4; } -TYPE Add(TYPE f1[COLS], TYPE f2[COLS], TYPE f3[COLS], TYPE f4[COLS])[COLS] { +TYPE Add(TYPE f1[COLS], TYPE f2[COLS], TYPE f3[COLS], TYPE f4[COLS], TYPE f5[COLS], TYPE f6[COLS])[COLS] { TYPE ret[COLS]; for (int i = 0; i < COLS; i++) - ret[i] = f1[i] + f2[i] + f3[i] + f4[i]; + ret[i] = f1[i] + f2[i] + f3[i] + f4[i] + f5[i] + f6[i]; return ret; } template -T Add(T v1, T v2, T v3, T v4) { return v1 + v2 + v3 + v4; } +T Add(T v1, T v2, T v3, T v4, T v5, T v6) { return v1 + v2 + v3 + v4 + v5 + v6; } -void main(uint ix[2] : IX) { +void main(uint ix[3] : IX) { // ByteAddressBuffer Tests // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) @@ -150,26 +150,55 @@ void main(uint ix[2] : IX) { // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] // OFF: [[RIX0:%.*]] = add i32 [[IX0]], [[BOFF:[0-9]+]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[RIX0]] - // MAT: [[IX0p4:%.*]] = add i32 [[RIX0]], [[p4:[0-9]+]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p4]] - // MAT: [[IX0p8:%.*]] = add i32 [[RIX0]], [[p8:[0-9]+]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p8]] + // MULTI: [[IX0p4:%.*]] = add i32 [[RIX0]], [[p4:[0-9]+]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p4]] + // MULTI: [[IX0p8:%.*]] = add i32 [[RIX0]], [[p8:[0-9]+]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p8]] // I1: icmp ne i32 // I1: icmp ne i32 // I1: icmp ne i32 // I1: icmp ne i32 TYPE babElt1 SS = RwByBuf.Load< TYPE SS >(ix[0]); + // CHECK-DAG: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 {{[0-9]*}}, i32 1 + // CHECK-DAG: [[RIX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 {{[0-9]*}}, i32 1 + // OFF: [[RIX1:%.*]] = add i32 [[IX1]], [[BOFF]] + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[RIX1]] + // MULTI: [[IX1p4:%.*]] = add i32 [[RIX1]], [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX1p4]] + // MULTI: [[IX1p8:%.*]] = add i32 [[RIX1]], [[p8]] + // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX1p8]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 + // I1: icmp ne i32 + // I1: icmp ne i32 + // I1: icmp ne i32 + uint status1; + TYPE babElt3 SS = RwByBuf.Load< TYPE SS >(ix[1], status1); + // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[RIX0]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE babElt2 SS = RoByBuf.Load< TYPE SS >(ix[0]); + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[RIX1]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1p4]] + // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1p8]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + uint status2; + TYPE babElt4 SS = RoByBuf.Load< TYPE SS >(ix[1], status2); + // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 @@ -177,48 +206,76 @@ void main(uint ix[2] : IX) { // OFF: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 {{%.*}}, i32 undef, float 0.0 // OFF: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 {{%.*}}, i32 undef, double 0.0 // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[RIX0]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p4]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p8]] - RwByBuf.Store< TYPE SS >(ix[0], Add(babElt1, babElt2)); + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p4]] + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0p8]] + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 100 + RwByBuf.Store< TYPE SS >(ix[0], Add(babElt1, babElt2, babElt3, babElt4)); + RwByBuf.Store< uint > (100, status1 && status2); // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt1 SS = RwStBuf.Load(ix[0]); - // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt2 SS = RwStBuf[ix[1]]; + // CHECK: [[IX2:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, i32 {{[0-9]*}}, i32 2 + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX2]], i32 [[BOFF]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX2]], i32 [[p4]] + // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX2]], i32 [[p8]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt5 SS = RwStBuf.Load(ix[2], status1); + // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]] // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt3 SS = RoStBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt4 SS = RoStBuf[ix[1]]; + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX2]], i32 [[BOFF]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX2]], i32 [[p4]] + // MULTI: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX2]], i32 [[p8]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt6 SS = RoStBuf.Load(ix[2], status2); + // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 @@ -226,9 +283,13 @@ void main(uint ix[2] : IX) { // OFF: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 0, float 0.0 // OFF: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 16, double 0.0 // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[BOFF]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] - RwStBuf[ix[0]] = Add(stbElt1, stbElt2, stbElt3, stbElt4); + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p4]] + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]], i32 [[p8]] + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 200 + RwStBuf[ix[0]] = Add(stbElt1, stbElt2, stbElt3, stbElt4, stbElt5, stbElt6); + RwByBuf.Store< uint > (200, status1 && status2); // {Append/Consume}StructuredBuffer Tests // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] @@ -236,8 +297,8 @@ void main(uint ix[2] : IX) { // OFF: call %dx.types.ResRet.f32 @dx.op.rawBufferLoad.f32(i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 // OFF: call %dx.types.ResRet.f64 @dx.op.rawBufferLoad.f64(i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 16 // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[BOFF]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[p4]] - // MAT: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[p8]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[p4]] + // MULTI: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLCON]], i32 [[CONIX]], i32 [[p8]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -253,7 +314,7 @@ void main(uint ix[2] : IX) { // OFF: call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 0 // OFF: call void @dx.op.rawBufferStore.f64(i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 16 // CHECK: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[BOFF]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[p4]] - // MAT: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[p8]] + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[p4]] + // MULTI: call void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLAPP]], i32 [[APPIX]], i32 [[p8]] ApStBuf.Append(cnElt); } diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl index 5305ee495b..f71b29e83e 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores-sm69.hlsl @@ -26,7 +26,7 @@ AppendStructuredBuffer > ApStBuf : register(u5); // CHECK-LABEL: define void @main [shader("vertex")] -void main(uint ix[2] : IX) { +void main(uint ix[3] : IX) { // ByteAddressBuffer Tests // CHECK-DAG: [[HDLROBY:%.*]] = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) @@ -45,36 +45,73 @@ void main(uint ix[2] : IX) { // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector babElt1 = RwByBuf.Load< vector >(ix[0]); + // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX1]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + uint status1; + vector babElt3 = RwByBuf.Load< vector >(ix[1], status1); + // CHECK: [[ANHDLROBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROBY]] // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROBY]], i32 [[IX0]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector babElt2 = RoByBuf.Load< vector >(ix[0]); + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + uint status2; + vector babElt4 = RoByBuf.Load< vector >(ix[1], status2); + // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32> // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] - RwByBuf.Store< vector >(ix[0], babElt1 + babElt2); + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 100 + RwByBuf.Store< vector >(ix[0], babElt1 + babElt2 + babElt3 + babElt4); + RwByBuf.Store< uint > (100, status1 && status2); // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector stbElt1 = RwStBuf.Load(ix[0]); - // CHECK: [[IX1:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector stbElt2 = RwStBuf[ix[1]]; + // CHECK: [[IX2:%.*]] = call i32 @dx.op.loadInput.i32(i32 4, + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLRWST]], i32 [[IX2]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt5 = RwStBuf.Load(ix[2], status1); + // CHECK: [[ANHDLROST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROST]] // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX0]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector stbElt3 = RoStBuf.Load(ix[0]); + // CHECK: call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]] // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer vector stbElt4 = RoStBuf[ix[1]]; + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[VTY]] @dx.op.rawBufferVectorLoad.[[VTY]](i32 303, %dx.types.Handle [[ANHDLROST]], i32 [[IX2]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[VTY]] [[RESRET]], 1 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne <[[NUM]] x i32> %{{.*}}, zeroinitializer + vector stbElt6 = RoStBuf.Load(ix[2], status2); + // I1: zext <[[NUM]] x i1> %{{.*}} to <[[NUM]] x i32> // CHECK: all void @dx.op.rawBufferVectorStore.[[VTY]](i32 304, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] - RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4; + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 200 + RwStBuf[ix[0]] = stbElt1 + stbElt2 + stbElt3 + stbElt4 + stbElt5 + stbElt6; + RwByBuf.Store< uint > (200, status1 && status2); // {Append/Consume}StructuredBuffer Tests // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl index 8dcf5ead1c..896f442c2c 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/buffer-load-stores.hlsl @@ -87,12 +87,36 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 TYPE babElt2 = RoByBuf.Load< TYPE >(ix0); + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX1]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + uint status1 = 0; + TYPE babElt3 = RwByBuf.Load< TYPE >(ix1, status1); + + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROBY]], i32 [[IX1]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + uint status2 = 0; + TYPE babElt4 = RoByBuf.Load< TYPE >(ix1, status2); + // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 [[IX0]] - RwByBuf.Store< TYPE >(ix0, babElt1 + babElt2); + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 100 + RwByBuf.Store< TYPE >(ix0, babElt1 + babElt2 + babElt3 + babElt4); + RwByBuf.Store< uint > (100, status1 && status2); // StructuredBuffer Tests // CHECK: [[ANHDLRWST:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWST]] @@ -102,6 +126,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt1 = RwStBuf.Load(ix0); + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX1]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -116,6 +141,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt3 = RoStBuf.Load(ix0); + // CHECK: call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX1]] // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 @@ -123,12 +149,34 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 TYPE stbElt4 = RoStBuf[ix1]; + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLRWST]], i32 [[IX20]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt5 = RwStBuf.Load(ix2[0], status1); + + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY]] @dx.op.rawBufferLoad.[[TY]](i32 139, %dx.types.Handle [[ANHDLROST]], i32 [[IX20]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY]] [[RESRET]], 4 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE stbElt6 = RoStBuf.Load(ix2[0], status2); + // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // CHECK: all void @dx.op.rawBufferStore.[[TY]](i32 140, %dx.types.Handle [[ANHDLRWST]], i32 [[IX0]] - RwStBuf[ix0] = stbElt1 + stbElt2 + stbElt3 + stbElt4; + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 200 + RwStBuf[ix0] = stbElt1 + stbElt2 + stbElt3 + stbElt4 + stbElt5 + stbElt6; + RwByBuf.Store< uint > (200, status1 && status2); // {Append/Consume}StructuredBuffer Tests // CHECK: [[ANHDLCON:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLCON]] @@ -167,6 +215,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE typElt1 = RwTyBuf.Load(ix0); + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX1]] // F64: call double @dx.op.makeDouble.f64(i32 101 // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -183,6 +232,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE typElt2 = RwTyBuf[ix1]; + // CHECK: [[ANHDLROTY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTY]] // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX0]] // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -200,6 +250,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE typElt3 = RoTyBuf.Load(ix0); + // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX1]] // F64: call double @dx.op.makeDouble.f64(i32 101 // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -217,6 +268,44 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 TYPE typElt4 = RoTyBuf[ix1]; + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX20]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY32]] [[RESRET]], 4 + // CHECK: [[CHK1:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt5 = RwTyBuf.Load(ix2[0], status1); + + // CHECK: [[RESRET:%.*]] = call %dx.types.ResRet.[[TY32]] @dx.op.bufferLoad.[[TY32]](i32 68, %dx.types.Handle [[ANHDLROTY]], i32 [[IX20]] + // CHECK: [[STATUS:%.*]] = extractvalue %dx.types.ResRet.[[TY32]] [[RESRET]], 4 + // CHECK: [[CHK2:%.*]] = call i1 @dx.op.checkAccessFullyMapped.i32(i32 71, i32 [[STATUS]]) + // F64: call double @dx.op.makeDouble.f64(i32 101 + // F64: call double @dx.op.makeDouble.f64(i32 101 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I64: zext i32 %{{.*}} to i64 + // I64: zext i32 %{{.*}} to i64 + // I64: shl nuw i64 + // I64: or i64 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + // I1: icmp ne i32 %{{.*}}, 0 + TYPE typElt6 = RoTyBuf.Load(ix2[0], status2); + // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 // F64: call %dx.types.splitdouble @dx.op.splitDouble.f64(i32 102 // I64: trunc i64 %{{.*}} to i32 @@ -229,8 +318,12 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 // I1: zext i1 %{{.*}} to i32 - // CHECK: all void @dx.op.bufferStore.[[TY32]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] - RwTyBuf[ix0] = typElt1 + typElt2 + typElt3 + typElt4; + // CHECK: call void @dx.op.bufferStore.[[TY32]](i32 69, %dx.types.Handle [[ANHDLRWTY]], i32 [[IX0]] + // CHECK: and i1 [[CHK1]], [[CHK2]] + // CHECK: [[ANHDLRWBY:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWBY]] + // CHECK: call void @dx.op.rawBufferStore.i32(i32 140, %dx.types.Handle [[ANHDLRWBY]], i32 300 + RwTyBuf[ix0] = typElt1 + typElt2 + typElt3 + typElt4 + typElt5 + typElt6; + RwByBuf.Store< uint > (300, status1 && status2); // Texture Tests // CHECK: [[ANHDLROTX1:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLROTX1]] @@ -250,6 +343,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE texElt1 = RoTex1d[ix0]; + // CHECK: [[ANHDLRWTX1:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX1]] // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX1]], i32 undef, i32 [[IX0]], i32 undef, i32 undef // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -285,6 +379,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE texElt3 = RoTex2d[ix2]; + // CHECK: [[ANHDLRWTX2:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX2]] // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX2]], i32 undef, i32 [[IX20]], i32 [[IX21]], i32 undef // F64: call double @dx.op.makeDouble.f64(i32 101 @@ -320,6 +415,7 @@ void main(uint ix0 : IX0, uint ix1 : IX1, uint2 ix2 : IX2, uint3 ix3 : IX3) { // I1: icmp ne i32 %{{.*}}, 0 // I1: icmp ne i32 %{{.*}}, 0 TYPE texElt5 = RoTex3d[ix3]; + // CHECK: [[ANHDLRWTX3:%.*]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle [[HDLRWTX3]] // CHECK: call %dx.types.ResRet.[[TY32]] @dx.op.textureLoad.[[TY32]](i32 66, %dx.types.Handle [[ANHDLRWTX3]], i32 undef, i32 [[IX30]], i32 [[IX31]], i32 [[IX32]] // F64: call double @dx.op.makeDouble.f64(i32 101 diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/check-shader-stages.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/check-shader-stages.hlsl new file mode 100644 index 0000000000..75e7c8a5cd --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/check-shader-stages.hlsl @@ -0,0 +1,135 @@ +// RUN: %dxc -T lib_6_9 %s | FileCheck %s + +ByteAddressBuffer matrix_buffer; +ByteAddressBuffer bias_buffer; +RWByteAddressBuffer rw_matrix_buffer; +ByteAddressBuffer input_vector_buffer; +RWByteAddressBuffer output_vector_buffer; + +void UseCoopVec() { + vector output_vector; + static const uint is_output_unsigned = 0; + + vector input_vector = input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = 9; /*F32*/ + + const uint matrix_offset = 0; + const uint matrix_interpretation = 9; /*F32*/ + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = 0; /*RowMajor*/ + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, + matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, + matrix_is_transposed, matrix_stride); + output_vector_buffer.Store(0, output_vector); + + const uint bias_offset = 0; + const uint bias_interpretation = 9; /*F32*/ + + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, + matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, + matrix_is_transposed, matrix_stride, bias_buffer, bias_offset, + bias_interpretation); + output_vector_buffer.Store(1024, output_vector); + + vector input_vector1; + vector input_vector2; + const uint opa_matrix_offset = 0; + const uint opa_matrix_interpretation = 5; /*U32*/ + const uint opa_matrix_layout = 3; /*OuterProductOptimal*/ + const uint opa_matrix_stride = 0; + + __builtin_OuterProductAccumulate(input_vector1, input_vector2, + rw_matrix_buffer, opa_matrix_offset, opa_matrix_interpretation, + opa_matrix_layout, opa_matrix_stride); + + const uint va_matrix_offset = 0; + + __builtin_VectorAccumulate(input_vector1, rw_matrix_buffer, + va_matrix_offset); +} + +// CHECK: define void @ps_main() +// CHECK: call <4 x float> @dx.op.matVecMul +// CHECK: call <4 x float> @dx.op.matVecMulAdd +// CHECK: call void @dx.op.outerProductAccumulate +// CHECK: call void @dx.op.vectorAccumulate + +[Shader("pixel")] +void ps_main() +{ + UseCoopVec(); +} + +// CHECK: define void @cs_main() +// CHECK: call <4 x float> @dx.op.matVecMul +// CHECK: call <4 x float> @dx.op.matVecMulAdd +// CHECK: call void @dx.op.outerProductAccumulate +// CHECK: call void @dx.op.vectorAccumulate + +[Shader("compute")] +[NumThreads(1,1,1)] +void cs_main() +{ + UseCoopVec(); +} + +// CHECK: define void @vs_main() +// CHECK: call <4 x float> @dx.op.matVecMul +// CHECK: call <4 x float> @dx.op.matVecMulAdd +// CHECK: call void @dx.op.outerProductAccumulate +// CHECK: call void @dx.op.vectorAccumulate + +[Shader("vertex")] +void vs_main() +{ + UseCoopVec(); +} + +struct MyRecord{ + uint a; +}; + +// CHECK: define void @ns_main() +// CHECK: call <4 x float> @dx.op.matVecMul +// CHECK: call <4 x float> @dx.op.matVecMulAdd +// CHECK: call void @dx.op.outerProductAccumulate +// CHECK: call void @dx.op.vectorAccumulate + +[Shader("node")] +[NodeLaunch("thread")] +void ns_main(ThreadNodeInputRecord input) +{ + UseCoopVec(); +} + +// Vertex shader output structure +struct VS_OUT { + float3 Color : COLOR0; +}; + +// Geometry shader output structure +struct GS_OUT { + float3 Color : COLOR0; + float2 TexCoord : TEXCOORD0; +}; + +// CHECK: define void @gs_main() +// CHECK: call <4 x float> @dx.op.matVecMul +// CHECK: call <4 x float> @dx.op.matVecMulAdd +// CHECK: call void @dx.op.outerProductAccumulate +// CHECK: call void @dx.op.vectorAccumulate + +[shader("geometry")] +[maxvertexcount(3)] +void gs_main(point VS_OUT input[1], + inout TriangleStream OutputStream) +{ + UseCoopVec(); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/linalg-builtins.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/linalg-builtins.hlsl new file mode 100644 index 0000000000..f1badb9101 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/linalg-builtins.hlsl @@ -0,0 +1,79 @@ +// RUN: %dxc -fcgl -T cs_6_9 -E cs_main %s | FileCheck %s + +ByteAddressBuffer input_vector_buffer; +ByteAddressBuffer opa_input_buffer; +ByteAddressBuffer matrix_buffer; +ByteAddressBuffer bias_buffer; +RWByteAddressBuffer rw_matrix_buffer; +RWByteAddressBuffer output_vector_buffer; + +[Shader("compute")] +[NumThreads(1,1,1)] +void cs_main() +{ + vector output_vector; + static const uint is_output_unsigned = 0; + + vector input_vector = input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = 9; /*F32*/ + + const uint matrix_offset = 0; + const uint matrix_interpretation = 9; /*F32*/ + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = 0; /*RowMajor*/ + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + // CHECK: %[[MLD0:[^ ]+]] = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?matrix_buffer@@3UByteAddressBuffer@@A" + // CHECK: %[[MCH0:[^ ]+]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %[[MLD0]]) + // CHECK: %[[MAH0:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %[[MCH0]], %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer undef) + // CHECK: call void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x float>* %{{[^ ]+}}, i1 false, <4 x float> %{{[^ ]+}}, i1 false, i32 9, %dx.types.Handle %[[MAH0]], i32 0, i32 9, i32 4, i32 4, i32 0, i1 false, i32 64) + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, + matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, + matrix_is_transposed, matrix_stride); + output_vector_buffer.Store(0, output_vector); + + const uint bias_offset = 0; + const uint bias_interpretation = 9; /*F32*/ + + // CHECK: %[[MLD1:[^ ]+]] = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?matrix_buffer@@3UByteAddressBuffer@@A" + // CHECK: %[[MCH1:[^ ]+]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %[[MLD1]]) + // CHECK: %[[MAH1:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %[[MCH1]], %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer undef) + // CHECK-NEXT: %[[BLD1:[^ ]+]] = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?bias_buffer@@3UByteAddressBuffer@@A" + // CHECK-NEXT: %[[BCH1:[^ ]+]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %[[BLD1]]) + // CHECK-NEXT: %[[BAH1:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %[[BCH1]], %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer undef) + // CHECK-NEXT: call void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x float>* %{{[^ ]+}}, i1 false, <4 x float> %{{[^ ]+}}, i1 false, i32 9, %dx.types.Handle %[[MAH1]], i32 0, i32 9, i32 4, i32 4, i32 0, i1 false, i32 64, %dx.types.Handle %[[BAH1]], i32 0, i32 9) + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, + matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, + matrix_is_transposed, matrix_stride, bias_buffer, bias_offset, + bias_interpretation); + output_vector_buffer.Store(1024, output_vector); + + vector input_vector1 = opa_input_buffer.Load >(0); + vector input_vector2 = opa_input_buffer.Load >(128); + const uint opa_matrix_offset = 0; + const uint opa_matrix_interpretation = 5; /*U32*/ + const uint opa_matrix_layout = 3; /*OuterProductOptimal*/ + const uint opa_matrix_stride = 0; + + // CHECK: %[[MLD2:[^ ]+]] = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A" + // CHECK: %[[MCH2:[^ ]+]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %[[MLD2]]) + // CHECK: %[[MAH2:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %[[MCH2]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %[[MAH2]], i32 0, i32 5, i32 3, i32 0) + __builtin_OuterProductAccumulate(input_vector1, input_vector2, + rw_matrix_buffer, opa_matrix_offset, opa_matrix_interpretation, + opa_matrix_layout, opa_matrix_stride); + + const uint va_matrix_offset = 0; + + // CHECK: %[[MLD3:[^ ]+]] = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A" + // CHECK: %[[MCH3:[^ ]+]] = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %[[MLD3]]) + // CHECK: %[[MAH3:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %[[MCH3]], %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef) + // CHECK: call void @"dx.hl.op..void (i32, <8 x i32>, %dx.types.Handle, i32)"(i32 393, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %[[MAH3]], i32 0) + __builtin_VectorAccumulate(input_vector1, rw_matrix_buffer, + va_matrix_offset); +} \ No newline at end of file diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/lit.local.cfg b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/lit.local.cfg new file mode 100644 index 0000000000..c2417a9e43 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/lit.local.cfg @@ -0,0 +1 @@ +config.unsupported = 'dxil-1-9' not in config.available_features \ No newline at end of file diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul-add_multioverload.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul-add_multioverload.hlsl new file mode 100644 index 0000000000..de811982d6 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul-add_multioverload.hlsl @@ -0,0 +1,122 @@ +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=F16 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-0 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 -DBI=F16 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-1 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 -DBI=F16 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-2 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=2 -DII=PackedS8x32 -DMI=I8 -DML=OuterProductOptimal -DMT=1 -DBI=I32 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-3 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 -DBI=I32 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-4 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=I8 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-5 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=8 -DII=U8 -DMI=I8 -DML=ColumnMajor -DMT=0 -DBI=I8 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-6 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DINUM=8 -DII=U8 -DMI=U8 -DML=MulOptimal -DMT=1 -DBI=I8 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-7 + +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=F16 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-0 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 -DBI=F16 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-1 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 -DBI=F16 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-2 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=2 -DII=PackedS8x32 -DMI=I8 -DML=OuterProductOptimal -DMT=1 -DBI=I32 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-3 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 -DBI=I32 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-4 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=F16 -DML=RowMajor -DMT=0 -DBI=I8 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-5 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=8 -DII=U8 -DMI=I8 -DML=ColumnMajor -DMT=0 -DBI=I8 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-6 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DINUM=8 -DII=U8 -DMI=U8 -DML=MulOptimal -DMT=1 -DBI=I8 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-7 + + +// COMMON: define void @main() + +// Test minimum support set of combinations for matVecMul +// HLOP-0: call void @"dx.hl.op..void (i32, <8 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 8, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8) + +// DXIL-0: call <8 x half> @dx.op.matVecMulAdd.v8f16.v8f16(i32 306, <8 x half> {{[^ ]+}}, i1 false, i32 8, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) + +// HLOP-1: call void @"dx.hl.op..void (i32, <8 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 21, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 0, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8) + +// DXIL-1: call <8 x half> @dx.op.matVecMulAdd.v8f16.v8f16(i32 306, <8 x half> {{[^ ]+}}, i1 false, i32 21, %dx.types.Handle {{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 0, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) + +// HLOP-2: call void @"dx.hl.op..void (i32, <8 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 22, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 0, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8) + +// DXIL-2: call <8 x half> @dx.op.matVecMulAdd.v8f16.v8f16(i32 306, <8 x half> {{[^ ]+}}, i1 false, i32 22, %dx.types.Handle {{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 0, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) + +// HLOP-3: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <2 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x i32>* %output_vector, i1 false, <2 x i32> %{{[^ ]+}}, i1 true, i32 17, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 0, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 4) + +// DXIL-3: call <8 x i32> @dx.op.matVecMulAdd.v8i32.v2i32(i32 306, <2 x i32> {{[^ ]+}}, i1 true, i32 17, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 0, %dx.types.Handle {{[^ ]+}}, i32 0, i32 4, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) + +// HLOP-4: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x i32>* %output_vector, i1 false, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 4) + +// DXIL-4: call <8 x i32> @dx.op.matVecMulAdd.v8i32.v8f32(i32 306, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 4, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) + +// Test unsigned variations +// HLOP-5: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x i32>* %output_vector, i1 true, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20) + +// DXIL-5: call <8 x i32> @dx.op.matVecMulAdd.v8i32.v8f32(i32 306, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i1 true) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) + +// HLOP-6: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 true, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20) + +// DXIL-6: call <8 x i32> @dx.op.matVecMulAdd.v8i32.v8i32(i32 306, <8 x i32> {{[^ ]+}}, i1 true, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) + +// HLOP-7: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <8 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 false, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 0, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20) + +// DXIL-7: call <8 x i32> @dx.op.matVecMulAdd.v8i32.v8i32(i32 306, <8 x i32> {{[^ ]+}}, i1 false, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 0, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i1 false) ; MatVecMulAdd(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,biasBuffer,biasOffset,biasIntepretation,isOutputUnsigned) + + +ByteAddressBuffer input_vector_buffer; +ByteAddressBuffer matrix_buffer; +ByteAddressBuffer bias_buffer; +RWByteAddressBuffer rw_matrix_buffer; +RWByteAddressBuffer output_vector_buffer; + +enum CompType { + Invalid = 0, + I1 = 1, + I16 = 2, + U16 = 3, + I32 = 4, + U32 = 5, + I64 = 6, + U64 = 7, + F16 = 8, + F32 = 9, + F64 = 10, + SNormF16 = 11, + UNormF16 = 12, + SNormF32 = 13, + UNormF32 = 14, + SNormF64 = 15, + UNormF64 = 16, + PackedS8x32 = 17, + PackedU8x32 = 18, + + // BEGIN NEW FOR SM 6.9 + U8 = 19, + I8 = 20, + F8_E4M3 = 21, + F8_E5M2 = 22, +}; + +enum MatLayout { + RowMajor = 0, + ColumnMajor = 1, + MulOptimal = 2, + OuterProductOptimal = 3, +}; + +[NumThreads(1,1,1)] +void main() +{ + vector output_vector; + static const uint is_output_unsigned = OU; + + vector input_vector = input_vector_buffer.Load >(0); + const uint is_input_unsigned = IU; + const uint input_interpretation = II; + + const uint matrix_offset = 0; + const uint matrix_interpretation = MI; + const uint matrix_dimM = 8; + const uint matrix_dimK = 8; + const uint matrix_layout = ML; + const bool matrix_is_transposed = (bool) MT; + const uint matrix_stride = MST; + + const uint bias_offset = 0; + const uint bias_interpretation = BI; + + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, matrix_interpretation, + matrix_dimM, matrix_dimK, matrix_layout, matrix_is_transposed, matrix_stride, bias_buffer, bias_offset, bias_interpretation); + output_vector_buffer.Store(0, output_vector); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul_multioverload.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul_multioverload.hlsl new file mode 100644 index 0000000000..8b14fb4cf1 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/mat-vec-mul_multioverload.hlsl @@ -0,0 +1,118 @@ +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-0 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-1 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 -DMST=0| FileCheck %s --check-prefixes COMMON,DXIL-2 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DII=PackedS8x32 -DINUM=2 -DMI=I8 -DML=OuterProductOptimal -DMT=1 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-3 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-4 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=F16 -DINUM=8 -DML=RowMajor -DMT=0 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-5 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=8 -DII=U8 -DMI=I8 -DINUM=8 -DML=ColumnMajor -DMT=0 -DMST=64 | FileCheck %s --check-prefixes COMMON,DXIL-6 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DINUM=8 -DII=U8 -DMI=U8 -DINUM=8 -DML=MulOptimal -DMT=1 -DMST=0 | FileCheck %s --check-prefixes COMMON,DXIL-7 + +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F16 -DMI=F16 -DML=RowMajor -DMT=0 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-0 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E4M3 -DMI=F8_E4M3 -DML=MulOptimal -DMT=0 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-1 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=float16_t -DIU=0 -DITY=float16_t -DINUM=8 -DII=F8_E5M2 -DMI=F8_E5M2 -DML=MulOptimal -DMT=1 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-2 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=2 -DII=PackedS8x32 -DMI=I8 -DML=OuterProductOptimal -DMT=1 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-3 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=I8 -DML=RowMajor -DMT=0 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-4 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=1 -DOTY=uint -DIU=0 -DITY=float -DINUM=8 -DII=I8 -DMI=F16 -DML=RowMajor -DMT=0 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-5 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=1 -DITY=uint -DINUM=8 -DII=U8 -DMI=I8 -DML=ColumnMajor -DMT=0 -DMST=64 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-6 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DOU=0 -DOTY=int -DIU=0 -DITY=int -DINUM=8 -DII=U8 -DMI=U8 -DML=MulOptimal -DMT=1 -DMST=0 -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-7 + +// COMMON: define void @main() + +// Test minimum support set of combinations for matVecMul +// HLOP-0: call void @"dx.hl.op..void (i32, <8 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 8, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64) + +// DXIL-0: call <8 x half> @dx.op.matVecMul.v8f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 8, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) + +// HLOP-1: call void @"dx.hl.op..void (i32, <8 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 21, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 0) + +// DXIL-1: call <8 x half> @dx.op.matVecMul.v8f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 21, %dx.types.Handle {{[^ ]+}}, i32 0, i32 21, i32 8, i32 8, i32 2, i1 false, i32 0, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) + +// HLOP-2: call void @"dx.hl.op..void (i32, <8 x half>*, i1, <8 x half>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x half>* %output_vector, i1 false, <8 x half> %{{[^ ]+}}, i1 false, i32 22, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 0) + +// DXIL-2: call <8 x half> @dx.op.matVecMul.v8f16.v8f16(i32 305, <8 x half> {{[^ ]+}}, i1 false, i32 22, %dx.types.Handle {{[^ ]+}}, i32 0, i32 22, i32 8, i32 8, i32 2, i1 true, i32 0, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) + +// HLOP-3: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <2 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x i32>* %output_vector, i1 false, <2 x i32> %{{[^ ]+}}, i1 true, i32 17, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 0) + +// DXIL-3: call <8 x i32> @dx.op.matVecMul.v8i32.v2i32(i32 305, <2 x i32> {{[^ ]+}}, i1 true, i32 17, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 3, i1 true, i32 0, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) + +// HLOP-4: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x i32>* %output_vector, i1 false, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64) + +// DXIL-4: call <8 x i32> @dx.op.matVecMul.v8i32.v8f32(i32 305, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 0, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) + +// Test unsigned variations +// HLOP-5: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x i32>* %output_vector, i1 true, <8 x float> %{{[^ ]+}}, i1 false, i32 20, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64) + +// DXIL-5: call <8 x i32> @dx.op.matVecMul.v8i32.v8f32(i32 305, <8 x float> {{[^ ]+}}, i1 false, i32 20, %dx.types.Handle {{[^ ]+}}, i32 0, i32 8, i32 8, i32 8, i32 0, i1 false, i32 64, i1 true) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) + +// HLOP-6: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 true, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64) + +// DXIL-6: call <8 x i32> @dx.op.matVecMul.v8i32.v8i32(i32 305, <8 x i32> {{[^ ]+}}, i1 true, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 20, i32 8, i32 8, i32 1, i1 false, i32 64, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) + +// HLOP-7: call void @"dx.hl.op..void (i32, <8 x i32>*, i1, <8 x i32>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <8 x i32>* %output_vector, i1 false, <8 x i32> %{{[^ ]+}}, i1 false, i32 19, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 0) + +// DXIL-7: call <8 x i32> @dx.op.matVecMul.v8i32.v8i32(i32 305, <8 x i32> {{[^ ]+}}, i1 false, i32 19, %dx.types.Handle {{[^ ]+}}, i32 0, i32 19, i32 8, i32 8, i32 2, i1 true, i32 0, i1 false) ; MatVecMul(inputVector,isInputUnsigned,inputInterpretation,matrixBuffer,matrixOffset,matrixIntepretation,matrixM,matrixK,matrixLayout,matrixTranspose,matrixStride,isOutputUnsigned) + + +ByteAddressBuffer input_vector_buffer; +ByteAddressBuffer matrix_buffer; +ByteAddressBuffer bias_buffer; +RWByteAddressBuffer rw_matrix_buffer; +RWByteAddressBuffer output_vector_buffer; + +enum CompType { + Invalid = 0, + I1 = 1, + I16 = 2, + U16 = 3, + I32 = 4, + U32 = 5, + I64 = 6, + U64 = 7, + F16 = 8, + F32 = 9, + F64 = 10, + SNormF16 = 11, + UNormF16 = 12, + SNormF32 = 13, + UNormF32 = 14, + SNormF64 = 15, + UNormF64 = 16, + PackedS8x32 = 17, + PackedU8x32 = 18, + + // BEGIN NEW FOR SM 6.9 + U8 = 19, + I8 = 20, + F8_E4M3 = 21, + F8_E5M2 = 22, +}; + +enum MatLayout { + RowMajor = 0, + ColumnMajor = 1, + MulOptimal = 2, + OuterProductOptimal = 3, +}; + +[NumThreads(1,1,1)] +void main() +{ + vector output_vector; + static const uint is_output_unsigned = OU; + + vector input_vector = input_vector_buffer.Load >(0); + const uint is_input_unsigned = IU; + const uint input_interpretation = II; + + const uint matrix_offset = 0; + const uint matrix_interpretation = MI; + const uint matrix_dimM = 8; + const uint matrix_dimK = 8; + const uint matrix_layout = ML; + const bool matrix_is_transposed = (bool) MT; + const uint matrix_stride = MST; + + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, matrix_interpretation, + matrix_dimM, matrix_dimK, matrix_layout, matrix_is_transposed, matrix_stride); + output_vector_buffer.Store(0, output_vector); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/outer-product-accumulate-multioverload.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/outer-product-accumulate-multioverload.hlsl new file mode 100644 index 0000000000..c53b7d8f21 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/outer-product-accumulate-multioverload.hlsl @@ -0,0 +1,75 @@ +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F16 -DML=OuterProductOptimal | FileCheck %s --check-prefixes COMMON,DXIL-0 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F8_E4M3 -DML=OuterProductOptimal | FileCheck %s --check-prefixes COMMON,DXIL-1 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=uint -DMI=U8 -DML=OuterProductOptimal | FileCheck %s --check-prefixes COMMON,DXIL-2 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F16 -DML=OuterProductOptimal -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-0 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=float16_t -DMI=F8_E4M3 -DML=OuterProductOptimal -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-1 +// RUN: %dxc -T cs_6_9 %s -enable-16bit-types -DITY=uint -DMI=U8 -DML=OuterProductOptimal -fcgl | FileCheck %s --check-prefixes COMMON,HLOP-2 + +ByteAddressBuffer input_vector_buffer; +ByteAddressBuffer input_vector_buffer2; +RWByteAddressBuffer matrix_buffer; + +// COMMON: define void @main() + +// DXIL-0: call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 3, i32 0) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) + +// HLOP-0: call void @"dx.hl.op..void (i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 3, i32 0) + +// DXIL-1: call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 3, i32 0) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) + +// HLOP-1: call void @"dx.hl.op..void (i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 21, i32 3, i32 0) + +// DXIL-2: call void @dx.op.outerProductAccumulate.v8i32.v8i32(i32 307, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 3, i32 0) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) + +// HLOP-2: call void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 19, i32 3, i32 0) + +enum CompType { + Invalid = 0, + I1 = 1, + I16 = 2, + U16 = 3, + I32 = 4, + U32 = 5, + I64 = 6, + U64 = 7, + F16 = 8, + F32 = 9, + F64 = 10, + SNormF16 = 11, + UNormF16 = 12, + SNormF32 = 13, + UNormF32 = 14, + SNormF64 = 15, + UNormF64 = 16, + PackedS8x32 = 17, + PackedU8x32 = 18, + + // BEGIN NEW FOR SM 6.9 + U8 = 19, + I8 = 20, + F8_E4M3 = 21, + F8_E5M2 = 22, +}; + +enum MatLayout { + RowMajor = 0, + ColumnMajor = 1, + MulOptimal = 2, + OuterProductOptimal = 3, +}; + + +[Numthreads(1,1,1)] +void main() +{ + vector input_vector1 = input_vector_buffer.Load >(0); + vector input_vector2 = input_vector_buffer2.Load >(0); + + const uint matrix_interpretation = MI; + const uint matrix_layout = ML; + const uint matrix_offset = 0; + const uint matrix_stride = 0; + + __builtin_OuterProductAccumulate(input_vector1, input_vector2, matrix_buffer, matrix_offset, matrix_interpretation, matrix_layout, matrix_stride); + +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/vector-accumulate.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/vector-accumulate.hlsl new file mode 100644 index 0000000000..dc1bb6c563 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/linalg_builtins/vector-accumulate.hlsl @@ -0,0 +1,16 @@ +// RUN: %dxc -T cs_6_9 %s | FileCheck %s + +RWByteAddressBuffer matrix_buffer; + +// Test use of __builtin_VectorAccumulate in compute shader +// CHECK: define void @main() +// CHECK: call void @dx.op.vectorAccumulate.v2i32(i32 {{[0-9]+}}, <2 x i32> , %dx.types.Handle {{%[0-9]+}}, i32 0) + +[NumThreads(1,1,1)] +void main() +{ + vector input_vector1 = 5; + const uint matrix_offset = 0; + + __builtin_VectorAccumulate(input_vector1, matrix_buffer, matrix_offset); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder_od.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder_od.hlsl new file mode 100644 index 0000000000..42dff9c52c --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/intrinsics/maybereorder_od.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxc -T lib_6_9 -E main %s -Od | FileCheck %s --check-prefix DXIL + +// DXIL: %[[HOA:[^ ]+]] = alloca %dx.types.HitObject, align 4 +// DXIL-NEXT: %[[NOP:[^ ]+]] = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() +// DXIL-NEXT: store %dx.types.HitObject %[[NOP]], %dx.types.HitObject* %[[HOA]] +// DXIL-NEXT: %[[LD0:[^ ]+]] = load %dx.types.HitObject, %dx.types.HitObject* %[[HOA]] +// DXIL-NEXT: call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %[[LD0]], i32 undef, i32 0) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) +// DXIL-NEXT: %[[LD1:[^ ]+]] = load %dx.types.HitObject, %dx.types.HitObject* %[[HOA]] +// DXIL-NEXT: call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %[[LD1]], i32 241, i32 3) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) +// DXIL-NEXT: %[[NOP2:[^ ]+]] = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() +// DXIL-NEXT: call void @dx.op.maybeReorderThread(i32 268, %dx.types.HitObject %[[NOP2]], i32 242, i32 7) ; MaybeReorderThread(hitObject,coherenceHint,numCoherenceHintBitsFromLSB) + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + dx::MaybeReorderThread(hit); + dx::MaybeReorderThread(hit, 0xf1, 3); + dx::MaybeReorderThread(0xf2, 7); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/mat-vec-mul.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/mat-vec-mul.hlsl new file mode 100644 index 0000000000..26bcc75da2 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/mat-vec-mul.hlsl @@ -0,0 +1,92 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types %s | FileCheck %s + +#include + +ByteAddressBuffer Buf; + +export float4 Test1(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 0}; + + // CHECK: %{{.+}} = call <4 x float> @dx.op.matVecMul.v4f32.v4f32(i32 305, <4 x float> %{{.+}}, i1 false, i32 8, %dx.types.Handle %{{.+}}, i32 0, i32 8, i32 4, i32 4, i32 2, i1 true, i32 0, i1 false) + return Mul( + Matrix, MakeInterpretedVector(Input)); +} + +export vector Test2(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 0}; + + // note the stride argument is dropped. + // CHECK: %{{.+}} = call <8 x float> @dx.op.matVecMul.v8f32.v6i32(i32 305, <6 x i32> %{{.+}}, i1 true, i32 18, %dx.types.Handle %{{.+}}, i32 0, i32 19, i32 8, i32 24, i32 2, i1 false, i32 0, i1 false) + return Mul(Matrix, + MakeInterpretedVector(Input)); +} + +// test that "stride" isn't ignored in non-optimal layouts +export vector Test3(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 6 * 4 * 8}; + + // CHECK: %{{.+}} = call <8 x float> @dx.op.matVecMul.v8f32.v6i32(i32 305, <6 x i32> %{{.+}}, i1 true, i32 18, %dx.types.Handle %{{.+}}, i32 0, i32 19, i32 8, i32 24, i32 0, i1 false, i32 192, i1 false) + return Mul(Matrix, + MakeInterpretedVector(Input)); +} + +// test that isUnsigned is set correctly for uint16_t +export vector Test4(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 6 * 4 * 8}; + + // CHECK: %{{.+}} = call <8 x i16> @dx.op.matVecMul.v8i16.v6i32(i32 305, <6 x i32> %{{.+}}, i1 true, i32 18, %dx.types.Handle %{{.+}}, i32 0, i32 19, i32 8, i32 24, i32 0, i1 false, i32 192, i1 true) + return Mul(Matrix, + MakeInterpretedVector(Input)); + +} + +// test that isUnsigned is set correctly for uint32_t +export vector Test5(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 6 * 4 * 8}; + + // CHECK: %{{.+}} = call <8 x i32> @dx.op.matVecMul.v8i32.v6i32(i32 305, <6 x i32> %{{.+}}, i1 true, i32 18, %dx.types.Handle %{{.+}}, i32 0, i32 19, i32 8, i32 24, i32 0, i1 false, i32 192, i1 true) + return Mul(Matrix, + MakeInterpretedVector(Input)); + +} + +// test that isUnsigned is set correctly for uint8_t4_packed +export vector Test5(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 6 * 4 * 8}; + + // CHECK: %{{.+}} = call <8 x i32> @dx.op.matVecMul.v8i32.v6i32(i32 305, <6 x i32> %{{.+}}, i1 true, i32 18, %dx.types.Handle %{{.+}}, i32 0, i32 19, i32 8, i32 24, i32 0, i1 false, i32 192, i1 true) + return Mul(Matrix, + MakeInterpretedVector(Input)); + +} + +// test that isUnsigned is set correctly for int8_t4_packed +export vector Test5(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 6 * 4 * 8}; + + // CHECK: %{{.+}} = call <8 x i32> @dx.op.matVecMul.v8i32.v6i32(i32 305, <6 x i32> %{{.+}}, i1 true, i32 17, %dx.types.Handle %{{.+}}, i32 0, i32 19, i32 8, i32 24, i32 0, i1 false, i32 192, i1 true) + return Mul(Matrix, + MakeInterpretedVector(Input)); + +} \ No newline at end of file diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/mat-vec-muladd.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/mat-vec-muladd.hlsl new file mode 100644 index 0000000000..c19e601904 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/mat-vec-muladd.hlsl @@ -0,0 +1,90 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 %s | FileCheck %s + +#include + +ByteAddressBuffer Buf; + +export float4 Test1(float4 input) { + using namespace dx::linalg; + + MatrixRef matrix = {Buf, + 0, 0}; + VectorRef biasVector = {Buf, 256}; + + InterpretedVector theVector = {input}; + + // CHECK: %{{.+}} = call <4 x float> @dx.op.matVecMulAdd.v4f32.v4f32(i32 306, <4 x float> %{{.+}}, i1 false, i32 8, %dx.types.Handle [[RES:%.+]], i32 0, i32 8, i32 4, i32 4, i32 2, i1 false, i32 0, %dx.types.Handle [[RES]], i32 256, i32 8, i1 false) + return MulAdd( + matrix, theVector, + biasVector); +} + +export float4 Test2(float4 input) { + using namespace dx::linalg; + + MatrixRef matrix = { + Buf, 0, 0}; + VectorRef biasVector = {Buf, 256}; + + InterpretedVector theVector = {input}; + + // CHECK: %{{.+}} = call <4 x float> @dx.op.matVecMulAdd.v4f32.v4f32(i32 306, <4 x float> %{{.+}}, i1 false, i32 8, %dx.types.Handle [[RES:%.+]], i32 0, i32 8, i32 4, i32 4, i32 2, i1 true, i32 0, %dx.types.Handle [[RES]], i32 256, i32 8, i1 false) + return MulAdd( + matrix, theVector, + biasVector); +} + +export float4 Test3(float4 input) { + using namespace dx::linalg; + + MatrixRef matrix = { + Buf, 0, 0}; + VectorRef biasVector = {Buf, 256}; + + // CHECK: %{{.+}} = call <4 x float> @dx.op.matVecMulAdd.v4f32.v4f32(i32 306, <4 x float> %{{.+}}, i1 false, i32 8, %dx.types.Handle [[RES:%.+]], i32 0, i32 8, i32 4, i32 4, i32 2, i1 true, i32 0, %dx.types.Handle [[RES]], i32 256, i32 8, i1 false) + return MulAdd( + matrix, MakeInterpretedVector(input), + biasVector); +} + +namespace ProposalExample { + +ByteAddressBuffer model; + +vector ApplyNeuralMaterial(vector inputVector) { + using namespace dx::linalg; + + MatrixRef matrix0 = { + model, 0, 0}; + + VectorRef biasVector0 = {model, 1024}; + + MatrixRef matrix1 = + {model, 2048, 0}; + + VectorRef biasVector1 = {model, 3072}; + + MatrixRef matrix2 = { + model, 4096, 0}; + + VectorRef biasVector2 = {model, 5120}; + + vector layer0 = MulAdd( + matrix0, MakeInterpretedVector(inputVector), + biasVector0); + layer0 = max(layer0, 0); + + vector layer1 = MulAdd( + matrix1, MakeInterpretedVector(layer0), + biasVector1); + layer1 = max(layer1, 0); + + vector output = MulAdd( + matrix2, MakeInterpretedVector(layer1), + biasVector2); + output = exp(output); + + return output; +} + +} // namespace ProposalExample diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/outer-product-accumulate-matrix-layout.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/outer-product-accumulate-matrix-layout.hlsl new file mode 100644 index 0000000000..e930557cf9 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/outer-product-accumulate-matrix-layout.hlsl @@ -0,0 +1,28 @@ +// RUN: %dxc -I %hlsl_headers -T cs_6_9 %s -enable-16bit-types -DML=MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL -DSTRIDE=0 2>&1 | FileCheck %s + +//Source file for the IR in \tools\clang\test\LitDXILValidation\outer-product-accumulate-matrix-layout-failing.ll +//Source file for the IR in \tools\clang\test\LitDXILValidation\outer-product-accumulate-matrix-layout-passing.ll + +ByteAddressBuffer input_vector_buffer; +ByteAddressBuffer input_vector_buffer2; +RWByteAddressBuffer matrix_buffer; + +#include + +// CHECK: call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %{{[^ ]+}}, <8 x half> %{{[^ ]+}}, %dx.types.Handle %{{[^ ]+}}, i32 0, i32 8, i32 3, i32 0) +using namespace dx::linalg; + +[Numthreads(1,1,1)] +[shader("compute")] +void main() +{ + vector input_vector1 = input_vector_buffer.Load >(0); + vector input_vector2 = input_vector_buffer2.Load >(0); + + const uint matrix_interpretation = DATA_TYPE_FLOAT16; + const uint matrix_layout = ML; + const uint matrix_offset = 0; + const uint matrix_stride = STRIDE; + + __builtin_OuterProductAccumulate(input_vector1, input_vector2, matrix_buffer, matrix_offset, matrix_interpretation, matrix_layout, matrix_stride); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/outerproductaccumulate.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/outerproductaccumulate.hlsl new file mode 100644 index 0000000000..eda15c66f6 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/outerproductaccumulate.hlsl @@ -0,0 +1,16 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types %s | FileCheck %s + +#include + +RWByteAddressBuffer RWBuf; + +export void Test4(vector Input1, vector Input2) { + using namespace dx::linalg; + + RWMatrixRef + matrix = {RWBuf, 0, 0}; + + // CHECK: call void @dx.op.outerProductAccumulate.v128f16.v64f16(i32 307, <128 x half> %{{.+}}, <64 x half> %{{.+}}, %dx.types.Handle %{{.+}}, i32 0, i32 8, i32 3, i32 0) + + OuterProductAccumulate(Input1, Input2, matrix); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/linalg/vectoraccumulate.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/linalg/vectoraccumulate.hlsl new file mode 100644 index 0000000000..9157156f10 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/linalg/vectoraccumulate.hlsl @@ -0,0 +1,14 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 %s | FileCheck %s + +#include + +RWByteAddressBuffer RWBuf; + +export void Test5(vector Input) { + using namespace dx::linalg; + + RWBuf.Store >(0, Input); + + // CHECK: call void @dx.op.vectorAccumulate.v128f32(i32 308, <128 x float> %{{.*}}, %dx.types.Handle %{{.*}}, i32 0) + VectorAccumulate(Input, RWBuf, 0); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_accessors.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_accessors.hlsl new file mode 100644 index 0000000000..daeabf9710 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_accessors.hlsl @@ -0,0 +1,113 @@ +// REQUIRES: dxil-1-9 +// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s --check-prefix DXIL + +// DXIL: %dx.types.HitObject = type { i8* } + +// DXIL: %[[NOP:[^ ]+]] = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() +// DXIL: %[[HIT:[^ ]+]] = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject %[[NOP]], i32 1) ; HitObject_SetShaderTableIndex(hitObject,shaderTableIndex) +// DXIL-DAG: %{{[^ ]+}} = call i1 @dx.op.hitObject_StateScalar.i1(i32 270, %dx.types.HitObject %[[HIT]]) ; HitObject_IsHit(hitObject) +// DXIL-DAG: %{{[^ ]+}} = call i1 @dx.op.hitObject_StateScalar.i1(i32 269, %dx.types.HitObject %[[HIT]]) ; HitObject_IsMiss(hitObject) +// DXIL-DAG: %{{[^ ]+}} = call i1 @dx.op.hitObject_StateScalar.i1(i32 271, %dx.types.HitObject %[[HIT]]) ; HitObject_IsNop(hitObject) +// DXIL-DAG: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 281, %dx.types.HitObject %[[HIT]]) ; HitObject_GeometryIndex(hitObject) +// DXIL-DAG: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 285, %dx.types.HitObject %[[HIT]]) ; HitObject_HitKind(hitObject) +// DXIL-DAG: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 282, %dx.types.HitObject %[[HIT]]) ; HitObject_InstanceIndex(hitObject) +// DXIL-DAG: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 283, %dx.types.HitObject %[[HIT]]) ; HitObject_InstanceID(hitObject) +// DXIL-DAG: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 284, %dx.types.HitObject %[[HIT]]) ; HitObject_PrimitiveIndex(hitObject) +// DXIL-DAG: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 286, %dx.types.HitObject %[[HIT]]) ; HitObject_ShaderTableIndex(hitObject) +// DXIL-DAG: %{{[^ ]+}} = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject %[[HIT]], i32 40) ; HitObject_LoadLocalRootTableConstant(hitObject,offset) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %[[HIT]], i32 0) ; HitObject_ObjectRayOrigin(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %[[HIT]], i32 1) ; HitObject_ObjectRayOrigin(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %[[HIT]], i32 2) ; HitObject_ObjectRayOrigin(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %[[HIT]], i32 0) ; HitObject_ObjectRayDirection(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %[[HIT]], i32 1) ; HitObject_ObjectRayDirection(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %[[HIT]], i32 2) ; HitObject_ObjectRayDirection(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %[[HIT]], i32 0) ; HitObject_WorldRayOrigin(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %[[HIT]], i32 1) ; HitObject_WorldRayOrigin(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %[[HIT]], i32 2) ; HitObject_WorldRayOrigin(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %[[HIT]], i32 0) ; HitObject_WorldRayDirection(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %[[HIT]], i32 1) ; HitObject_WorldRayDirection(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %[[HIT]], i32 2) ; HitObject_WorldRayDirection(hitObject,component) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 0, i32 0) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 0, i32 1) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 0, i32 2) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 0, i32 3) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 1, i32 0) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 1, i32 1) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 1, i32 2) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 1, i32 3) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 2, i32 0) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 2, i32 1) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 2, i32 2) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[HIT]], i32 2, i32 3) ; HitObject_ObjectToWorld3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 0, i32 0) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 0, i32 1) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 0, i32 2) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 0, i32 3) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 1, i32 0) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 1, i32 1) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 1, i32 2) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 1, i32 3) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 2, i32 0) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 2, i32 1) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 2, i32 2) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL-DAG: %{{[^ ]+}} = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[HIT]], i32 2, i32 3) ; HitObject_WorldToObject3x4(hitObject,row,col) +// DXIL: ret void + +RWByteAddressBuffer outbuf; + +template +float hashM(in matrix mat) { + float h = 0.f; + for (int i = 0; i < M; ++i) + for (int j = 0; j < N; ++j) + h += mat[i][j]; + return h; +} + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + int isum = 0; + float fsum = 0.0f; + vector vsum = 0; + + ///// Setters + hit.SetShaderTableIndex(1); + + ///// Getters + + // i1 accessors + isum += hit.IsHit(); + isum += hit.IsMiss(); + isum += hit.IsNop(); + + // i32 accessors + isum += hit.GetGeometryIndex(); + isum += hit.GetHitKind(); + isum += hit.GetInstanceIndex(); + isum += hit.GetInstanceID(); + isum += hit.GetPrimitiveIndex(); + isum += hit.GetShaderTableIndex(); + isum += hit.LoadLocalRootTableConstant(40); + + // float3 accessors + vsum += hit.GetWorldRayOrigin(); + vsum += hit.GetWorldRayDirection(); + vsum += hit.GetObjectRayOrigin(); + vsum += hit.GetObjectRayDirection(); + fsum += vsum[0] + vsum[1] + vsum[2]; + + // matrix accessors + fsum += hashM<3, 4>(hit.GetObjectToWorld3x4()); + fsum += hashM<4, 3>(hit.GetObjectToWorld4x3()); + fsum += hashM<3, 4>(hit.GetWorldToObject3x4()); + fsum += hashM<4, 3>(hit.GetWorldToObject4x3()); + + // f32 accessors + isum += hit.GetRayFlags(); + fsum += hit.GetRayTMin(); + fsum += hit.GetRayTCurrent(); + + outbuf.Store(0, fsum); + outbuf.Store(4, isum); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes.hlsl new file mode 100644 index 0000000000..55ef023a2f --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes.hlsl @@ -0,0 +1,27 @@ +// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s --check-prefix DXIL + +// DXIL: %[[APTR:[^ ]+]] = alloca %struct.CustomAttrs, align 4 +// DXIL: %[[NOP:[^ ]+]] = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() +// DXIL: call void @dx.op.hitObject_Attributes.struct.CustomAttrs(i32 289, %dx.types.HitObject %[[NOP]], %struct.CustomAttrs* nonnull %[[APTR]]) ; HitObject_Attributes(hitObject,attributes) +// DXIL: %[[VPTR:[^ ]+]] = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %[[APTR]], i32 0, i32 0 +// DXIL: %{{[^ ]+}} = load <4 x float>, <4 x float>* %[[VPTR]], align 4 +// DXIL: %[[IPTR:[^ ]+]] = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %[[APTR]], i32 0, i32 1 +// DXIL: %{{[^ ]+}} = load i32, i32* %[[IPTR]], align 4 +// DXIL: ret void + +RWByteAddressBuffer outbuf; + +struct +CustomAttrs { + float4 v; + int y; +}; + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + CustomAttrs attrs; + hit.GetAttributes(attrs); + float sum = attrs.v.x + attrs.v.y + attrs.v.z + attrs.v.w + attrs.y; + outbuf.Store(0, sum); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes_builtin.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes_builtin.hlsl new file mode 100644 index 0000000000..59140ab37e --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_attributes_builtin.hlsl @@ -0,0 +1,43 @@ +// RUN: %dxc /Tlib_6_9 %s | FileCheck %s +// RUN: %dxc /Tlib_6_9 -fcgl %s | FileCheck %s -check-prefix=FCGL + +// Make sure that we can use the BuiltInTriangleIntersectionAttributes struct +// as a template argument to GetAttributes. + +// For -fcgl, just check the form of the HL call. +// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.BuiltInTriangleIntersectionAttributes*)"(i32 364, %dx.types.HitObject* %{{[^ ]+}}, %struct.BuiltInTriangleIntersectionAttributes* %{{[^ ]+}}) + +// CHECK: %[[ATTR:[^ ]+]] = alloca %struct.BuiltInTriangleIntersectionAttributes +// CHECK: call void @dx.op.hitObject_Attributes.struct.BuiltInTriangleIntersectionAttributes(i32 289, %dx.types.HitObject %{{[^ ]+}}, %struct.BuiltInTriangleIntersectionAttributes* nonnull %[[ATTR]]) + +RaytracingAccelerationStructure Scene : register(t0, space0); +RWTexture2D RenderTarget : register(u0); + +struct [raypayload] RayPayload +{ + float4 color : write(caller, closesthit, miss) : read(caller); +}; + +typedef BuiltInTriangleIntersectionAttributes MyAttribs; + +[shader("raygeneration")] +void MyRaygenShader() +{ + RayDesc ray; + ray.Origin = float3(0,0,0); + ray.Direction = float3(0, 0, 1); + ray.TMin = 0.001; + ray.TMax = 10000.0; + + RayPayload payload = { float4(0, 0, 0, 0) }; + float4 color = float4(1,1,1,1); + + dx::HitObject hit = dx::HitObject::TraceRay(Scene, RAY_FLAG_NONE, ~0, 0, 1, 0, ray, payload); + + MyAttribs attr; + hit.GetAttributes(attr); + payload.color += float4(attr,0,1); + + // Write the raytraced color to the output texture. + RenderTarget[DispatchRaysIndex().xy] = payload.color; +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl new file mode 100644 index 0000000000..33ea2719be --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl @@ -0,0 +1,37 @@ +// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s --check-prefix DXIL + +// DXIL: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32 263, i32 %[[RQ:[^ ]+]]) ; HitObject_FromRayQuery(rayQueryHandle) +// DXIL: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 %[[RQ]], i32 16, %struct.CustomAttrs* nonnull %{{[^ ]+}}) ; HitObject_FromRayQueryWithAttrs(rayQueryHandle,HitKind,CommittedAttribs) + +RaytracingAccelerationStructure RTAS; +RWStructuredBuffer UAV : register(u0); + +RayDesc MakeRayDesc() { + RayDesc desc; + desc.Origin = float3(0, 0, 0); + desc.Direction = float3(1, 0, 0); + desc.TMin = 0.0f; + desc.TMax = 9999.0; + return desc; +} + +struct CustomAttrs { + float x; + float y; +}; + +void Use(in dx::HitObject hit) { + dx::MaybeReorderThread(hit); +} + +[shader("raygeneration")] +void main() { + RayQuery q; + RayDesc ray = MakeRayDesc(); + q.TraceRayInline(RTAS, RAY_FLAG_NONE, 0xFF, ray); + + Use(dx::HitObject::FromRayQuery(q)); + + CustomAttrs attrs = {1.f, 2.f}; + Use(dx::HitObject::FromRayQuery(q, 16, attrs)); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl index 1e947b2296..cc9515d7c1 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_make.hlsl @@ -25,9 +25,9 @@ // AST-NEXT: | | | |-TemplateArgument type 'unsigned int' // AST-NEXT: | | | |-TemplateArgument type 'unsigned int' // AST-NEXT: | | | |-TemplateArgument type 'RayDesc' -// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> MakeMiss 'unsigned int' // AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> RayFlags 'unsigned int' -// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> MissShaderIndex 'RayDesc' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> MissShaderIndex 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> Ray 'RayDesc' // AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 387 // AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl new file mode 100644 index 0000000000..4ea00475f1 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl @@ -0,0 +1,102 @@ +// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s --check-prefix AST +// RUN: %dxc -T lib_6_9 -E main %s -fcgl | FileCheck %s --check-prefix FCGL +// RUN: %dxc -T lib_6_9 -E main %s | FileCheck %s --check-prefix DXIL + +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> Invoke +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class Tho +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TPayload +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit Invoke 'TResult (Tho, TPayload &) const' static +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> ho 'Tho' +// AST-NEXT: | | | | `-ParmVarDecl {{[^ ]+}} <> Payload 'TPayload &' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used Invoke 'void (dx::HitObject, Payload &)' static +// AST-NEXT: | | | |-TemplateArgument type 'void' +// AST-NEXT: | | | |-TemplateArgument type 'dx::HitObject':'dx::HitObject' +// AST-NEXT: | | | |-TemplateArgument type 'Payload' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> ho 'dx::HitObject':'dx::HitObject' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> Payload 'Payload &&__restrict' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 382 +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> TraceRay +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TAccelerationStructure +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TRayFlags +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TInstanceInclusionMask +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TRayContributionToHitGroupIndex +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TMultiplierForGeometryContributionToHitGroupIndex +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TMissShaderIndex +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TRay +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TPayload +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit TraceRay 'TResult (TAccelerationStructure, TRayFlags, TInstanceInclusionMask, TRayContributionToHitGroupIndex, TMultiplierForGeometryContributionToHitGroupIndex, TMissShaderIndex, TRay, TPayload &) const' static +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> AccelerationStructure 'TAccelerationStructure' +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> RayFlags 'TRayFlags' +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> InstanceInclusionMask 'TInstanceInclusionMask' +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> RayContributionToHitGroupIndex 'TRayContributionToHitGroupIndex' +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> MultiplierForGeometryContributionToHitGroupIndex 'TMultiplierForGeometryContributionToHitGroupIndex' +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> MissShaderIndex 'TMissShaderIndex' +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> Ray 'TRay' +// AST-NEXT: | | | | `-ParmVarDecl {{[^ ]+}} <> Payload 'TPayload &' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used TraceRay 'dx::HitObject (RaytracingAccelerationStructure, unsigned int, unsigned int, unsigned int, unsigned int, unsigned int, RayDesc, Payload &)' static +// AST-NEXT: | | | |-TemplateArgument type 'dx::HitObject' +// AST-NEXT: | | | |-TemplateArgument type 'RaytracingAccelerationStructure' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-TemplateArgument type 'RayDesc' +// AST-NEXT: | | | |-TemplateArgument type 'Payload' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> AccelerationStructure 'RaytracingAccelerationStructure' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> RayFlags 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> InstanceInclusionMask 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> RayContributionToHitGroupIndex 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> MultiplierForGeometryContributionToHitGroupIndex 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> MissShaderIndex 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> Ray 'RayDesc' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> Payload 'Payload &&__restrict' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 389 +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// FCGL: %[[HANDLE:[^ ]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) +// FCGL-NEXT: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32 389, %dx.types.HitObject* %{{[^ ]+}}, %dx.types.Handle %[[HANDLE]], i32 513, i32 1, i32 2, i32 4, i32 0, %struct.RayDesc* %{{[^ ]+}}, %struct.Payload* %{{[^ ]+}}) +// FCGL-NEXT: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32 382, %dx.types.HitObject* %{{[^ ]+}}, %struct.Payload* %{{[^ ]+}}) + +// DXIL: %[[RTAS:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %{{[^ ]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }) ; AnnotateHandle(res,props) resource: RTAccelerationStructure +// DXIL: %[[HIT:[^ ]+]] = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %[[RTAS]], i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %{{[^ ]+}}) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) +// DXIL: call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject %[[HIT]], %struct.Payload* nonnull %{{[^ ]+}}) ; HitObject_Invoke(hitObject,payload) + +// DXIL: !dx.dxrPayloadAnnotations = !{![[MDPLD:[^ ]+]]} +// DXIL: ![[MDPLD]] = !{i32 0, %struct.Payload undef, !{{[^ ]+}}} + +RaytracingAccelerationStructure RTAS; +RWStructuredBuffer UAV : register(u0); + +struct [raypayload] +Payload { + float3 dummy : read(closesthit) : write(caller, anyhit); +}; + +[shader("raygeneration")] +void main() { + RayDesc rayDesc; + rayDesc.Origin = float3(0.0, 1.0, 2.0); + rayDesc.TMin = 3.0f; + rayDesc.Direction = float3(4.0, 5.0, 6.0); + rayDesc.TMax = 7.0f; + + Payload pld; + pld.dummy = float3(7.0, 8.0, 9.0); + + dx::HitObject hit = dx::HitObject::TraceRay( + RTAS, + RAY_FLAG_FORCE_OPAQUE | RAY_FLAG_SKIP_PROCEDURAL_PRIMITIVES, + 1, + 2, + 4, + 0, + rayDesc, + pld); + + dx::HitObject::Invoke(hit, pld); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/objects/RayQuery/tracerayinline.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl similarity index 100% rename from tools/clang/test/HLSLFileCheck/hlsl/objects/RayQuery/tracerayinline.hlsl rename to tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl diff --git a/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl new file mode 100644 index 0000000000..256b6a04e8 --- /dev/null +++ b/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl @@ -0,0 +1,14 @@ +// RUN: %dxc -T vs_6_5 -E main %s | FileCheck %s + +// CHECK-DAG: %[[RTAS:[^ ]+]] = call %dx.types.Handle @dx.op.createHandle(i32 57, i8 0, i32 0, i32 0, i1 false) +// CHECK-DAG: %[[RQ:[^ ]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513) +// CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, + +RaytracingAccelerationStructure RTAS; + +RayDesc rayDesc; + +void main() { + RayQuery rayQuery; + rayQuery.TraceRayInline(RTAS, 1, 2, rayDesc); +} diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl index 0b7f0d6b2f..f13772970b 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-intrinsics.hlsl @@ -2,7 +2,6 @@ // RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=7 %s | FileCheck %s // RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=125 %s | FileCheck %s // RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=256 %s | FileCheck %s -// RUN: %dxc -T cs_6_9 -enable-16bit-types -DNUM=1024 %s | FileCheck %s // Test vector-enabled non-trivial intrinsics that take parameters of various types. @@ -203,6 +202,36 @@ void main() { // CHECK: fmul fast <[[NUM]] x float> [[tmp]], @dx.op.unary.[[HTY]](i32 23, <[[NUM]] x half> [[hvec2]]) ; Log(value) + // CHECK: [[tmp2:%.*]] = fmul fast <[[NUM]] x half> [[tmp]], [[hvec1]] + // CHECK: call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 21, <[[NUM]] x half> [[tmp2]]) ; Exp(value) + hRes += pow(hVec2, hVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 23, <[[NUM]] x float> [[fvec2]]) ; Log(value) + // CHECK: [[tmp2:%.*]] = fmul fast <[[NUM]] x float> [[tmp]], [[fvec1]] + // CHECK: call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 21, <[[NUM]] x float> [[tmp2]]) ; Exp(value) + fRes += pow(fVec2, fVec1); + + vector hVal; + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x half> @dx.op.unary.[[HTY]](i32 29, <[[NUM]] x half> [[hvec1]]) ; Round_z(value) + // CHECK: fsub fast <[[NUM]] x half> [[hvec1]], [[tmp]] + hRes *= modf(hVec1, hVal); + hRes += hVal; + + vector fVal; + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = call <[[NUM]] x float> @dx.op.unary.[[FTY]](i32 29, <[[NUM]] x float> [[fvec1]]) ; Round_z(value) + // CHECK: fsub fast <[[NUM]] x float> [[fvec1]], [[tmp]] + fRes *= modf(fVec1, fVal); + fRes += fVal; + // CHECK-NOT: extractelement // CHECK-NOT: insertelement // CHECK: [[sub:%.*]] = fsub fast <[[NUM]] x half> [[hvec2]], [[hvec1]] @@ -227,6 +256,25 @@ void main() { // CHECK: fmul fast <[[NUM]] x float> [[mul]], [[sub]] fRes += smoothstep(fVec1, fVec2, fVec3); + // Note that Fabs is tested in longvec-trivial-unary-float-intrinsics. + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = sub <[[NUM]] x i16> zeroinitializer, [[svec1]] + // CHECK: call <[[NUM]] x i16> @dx.op.binary.[[STY]](i32 37, <[[NUM]] x i16> [[svec1]], <[[NUM]] x i16> [[tmp]]) ; IMax(a,b) + sRes += abs(sVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = sub <[[NUM]] x i32> zeroinitializer, [[ivec1]] + // CHECK: call <[[NUM]] x i32> @dx.op.binary.[[ITY]](i32 37, <[[NUM]] x i32> [[ivec1]], <[[NUM]] x i32> [[tmp]]) ; IMax(a,b) + iRes += abs(iVec1); + + // CHECK-NOT: extractelement + // CHECK-NOT: insertelement + // CHECK: [[tmp:%.*]] = sub <[[NUM]] x i64> zeroinitializer, [[lvec1]] + // CHECK: call <[[NUM]] x i64> @dx.op.binary.[[LTY]](i32 37, <[[NUM]] x i64> [[lvec1]], <[[NUM]] x i64> [[tmp]]) ; IMax(a,b) + lRes += abs(lVec1); + // Intrinsics that expand into llvm ops. // CHECK-NOT: extractelement diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl index 6ebb511b00..37fb1d2e15 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-scalarized-intrinsics.hlsl @@ -9,6 +9,13 @@ // RUN: %dxc -DFUNC=countbits -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY // RUN: %dxc -DFUNC=firstbithigh -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY // RUN: %dxc -DFUNC=firstbitlow -DARITY=1 -DTYPE=uint -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddx -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddx_coarse -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddx_fine -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddy -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddy_coarse -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=ddy_fine -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY +// RUN: %dxc -DFUNC=fwidth -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,UNARY // RUN: %dxc -DFUNC=QuadReadLaneAt -DARITY=4 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD // RUN: %dxc -DFUNC=QuadReadAcrossX -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD // RUN: %dxc -DFUNC=QuadReadAcrossY -DARITY=1 -T ps_6_9 %s | FileCheck %s --check-prefixes=CHECK,QUAD diff --git a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl index 91ab631a7e..9cc3d23b66 100644 --- a/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl +++ b/tools/clang/test/CodeGenDXIL/hlsl/types/longvec-trivial-unary-float-intrinsics.hlsl @@ -1,3 +1,5 @@ +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=abs -DOP=6 -DNUM=7 %s | FileCheck %s +// RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=abs -DOP=6 -DNUM=1022 %s | FileCheck %s // RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate -DOP=7 -DNUM=7 %s | FileCheck %s // RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=saturate -DOP=7 -DNUM=1022 %s | FileCheck %s // RUN: %dxc -T cs_6_9 -enable-16bit-types -DFUNC=cos -DOP=12 -DNUM=7 %s | FileCheck %s diff --git a/tools/clang/test/CodeGenSPIRV/amplification_shader_derivative.hlsl b/tools/clang/test/CodeGenSPIRV/amplification_shader_derivative.hlsl new file mode 100644 index 0000000000..9982cf1cda --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/amplification_shader_derivative.hlsl @@ -0,0 +1,28 @@ +// RUN: %dxc -T as_6_5 -E main -fspv-target-env=vulkan1.3 %s -spirv | FileCheck %s --check-prefix=VK13 +// RUN: %dxc -T as_6_5 -E main -fspv-target-env=vulkan1.1 -Vd %s -spirv | FileCheck %s --check-prefix=VK11 + +// VK13-DAG: OpCapability ComputeDerivativeGroupLinearKHR +// VK13-DAG: OpCapability DerivativeControl +// VK13-DAG: OpCapability MeshShadingEXT +// VK13-DAG: OpExtension "SPV_EXT_mesh_shader" +// VK13-DAG: OpExtension "SPV_KHR_compute_shader_derivatives" +// VK13: OpEntryPoint TaskEXT %main "main" +// VK13: OpExecutionMode %main DerivativeGroupLinearKHR + +// VK11-DAG: OpExtension "SPV_NV_mesh_shader" +// VK11: OpEntryPoint TaskNV %main "main" +// VK11-NOT: OpExecutionMode %main DerivativeGroup + +struct AmplificationPayload +{ + float4 value; +}; + +groupshared AmplificationPayload payload; + +[numthreads(4, 1, 1)] +void main(in uint tid : SV_GroupThreadID, in uint gtid : SV_GroupID) +{ + payload.value = ddx_coarse(float4(tid, 0, 0, 0)); + DispatchMesh(1,1,1, payload); +} diff --git a/tools/clang/test/CodeGenSPIRV/bezier.hull.hlsl2spv b/tools/clang/test/CodeGenSPIRV/bezier.hull.hlsl2spv index 30565394b4..1425137c68 100644 --- a/tools/clang/test/CodeGenSPIRV/bezier.hull.hlsl2spv +++ b/tools/clang/test/CodeGenSPIRV/bezier.hull.hlsl2spv @@ -161,7 +161,7 @@ BEZIER_CONTROL_POINT SubDToBezierHS(InputPatch&1 | FileCheck %s + +// CHECK: error: field "gFoo" at register(c5) overlaps with previous members + +uniform float4x4 gMVP : register(c0); +uniform float4 gFoo : register(c5); +uniform float4 gBar : register(c5); + +float4 main(float4 pos : POSITION) : SV_Position { + return mul(gMVP, pos * gFoo + gBar); +} diff --git a/tools/clang/test/CodeGenSPIRV/enum_sizeof.hlsl b/tools/clang/test/CodeGenSPIRV/enum_sizeof.hlsl new file mode 100644 index 0000000000..f596a2db50 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/enum_sizeof.hlsl @@ -0,0 +1,31 @@ +// RUN: %dxc -T cs_6_0 -E main -fcgl %s -spirv | FileCheck %s + +enum E1 : uint64_t +{ + v1 = 0, +}; + +enum E2 : uint32_t +{ + v2 = 0, +}; + +struct S { + E1 e1; + E2 e2; +}; + +RWBuffer b; + +[numthreads(128, 1, 1)] +void main() +{ +// CHECK: OpImageWrite {{%.*}} %uint_0 %int_8 None + b[0] = sizeof(E1); + +// CHECK: OpImageWrite {{%.*}} %uint_1 %int_4 None + b[1] = sizeof(E2); + +// CHECK: OpImageWrite {{%.*}} %uint_2 %int_16 None + b[2] = sizeof(S); +} diff --git a/tools/clang/test/CodeGenSPIRV/fn.export.with.entrypoint.hlsl b/tools/clang/test/CodeGenSPIRV/fn.export.with.entrypoint.hlsl index da25ead9c1..312476b260 100644 --- a/tools/clang/test/CodeGenSPIRV/fn.export.with.entrypoint.hlsl +++ b/tools/clang/test/CodeGenSPIRV/fn.export.with.entrypoint.hlsl @@ -1,4 +1,4 @@ -// RUN: %dxc -T as_6_6 -E main -fspv-target-env=vulkan1.3 -fcgl %s -spirv | FileCheck %s +// RUN: %dxc -T as_6_6 -E main -fspv-target-env=universal1.5 -fcgl %s -spirv | FileCheck %s // CHECK: OpCapability Linkage // CHECK: OpDecorate %external_function LinkageAttributes "external_function" Export @@ -10,4 +10,4 @@ export int external_function() { void main() { external_function(); return; -} \ No newline at end of file +} diff --git a/tools/clang/test/CodeGenSPIRV/groupshared.init.warning.hlsl b/tools/clang/test/CodeGenSPIRV/groupshared.init.warning.hlsl new file mode 100644 index 0000000000..c49534948b --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/groupshared.init.warning.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxc -T cs_6_0 -E main -spirv %s 2>&1 | FileCheck %s + +groupshared uint testing = 0; + +[numthreads(64, 1, 1)] +void main(uint local_thread_id_flat : SV_GroupIndex) { + + InterlockedAdd(testing, 1); + GroupMemoryBarrierWithGroupSync(); + + if (local_thread_id_flat == 0) { + if (testing > 64) { + printf("testing is %u wtf", testing); + } + } +} + +// CHECK: warning: Initializer of external global will be ignored +// CHECK-NEXT: groupshared uint testing = 0; \ No newline at end of file diff --git a/tools/clang/test/CodeGenSPIRV/hs.const.output-patch.out.hlsl b/tools/clang/test/CodeGenSPIRV/hs.const.output-patch.out.hlsl index 6bbcdd3764..08669c3de0 100644 --- a/tools/clang/test/CodeGenSPIRV/hs.const.output-patch.out.hlsl +++ b/tools/clang/test/CodeGenSPIRV/hs.const.output-patch.out.hlsl @@ -8,13 +8,13 @@ struct ControlPoint { float4 position : POSITION; }; // CHECK: OpFunctionCall %void %HullConst %param_var_edge %param_var_inside %param_var_myFloat // CHECK: [[edges:%[0-9]+]] = OpLoad %_arr_float_uint_3 %param_var_edge // CHECK: [[addr:%[0-9]+]] = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_0 -// CHECK: [[val:%[0-9]+]] = OpCompositeExtract %float %66 0 +// CHECK: [[val:%[0-9]+]] = OpCompositeExtract %float [[arr:%[0-9]+]] 0 // CHECK: OpStore [[addr]] [[val]] // CHECK: [[addr:%[0-9]+]] = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_1 -// CHECK: [[val:%[0-9]+]] = OpCompositeExtract %float %66 1 +// CHECK: [[val:%[0-9]+]] = OpCompositeExtract %float [[arr]] 1 // CHECK: OpStore [[addr]] [[val]] // CHECK: [[addr:%[0-9]+]] = OpAccessChain %_ptr_Output_float %gl_TessLevelOuter %uint_2 -// CHECK: [[val:%[0-9]+]] = OpCompositeExtract %float %66 2 +// CHECK: [[val:%[0-9]+]] = OpCompositeExtract %float [[arr]] 2 // CHECK: OpStore [[addr]] [[val]] // CHECK: [[val:%[0-9]+]] = OpLoad %float %param_var_inside // CHECK: [[addr:%[0-9]+]] = OpAccessChain %_ptr_Output_float %gl_TessLevelInner %uint_0 diff --git a/tools/clang/test/CodeGenSPIRV/inline-spirv/spv.inline.decorate.member.hlsl b/tools/clang/test/CodeGenSPIRV/inline-spirv/spv.inline.decorate.member.hlsl index bb4c2efde1..88a902d326 100644 --- a/tools/clang/test/CodeGenSPIRV/inline-spirv/spv.inline.decorate.member.hlsl +++ b/tools/clang/test/CodeGenSPIRV/inline-spirv/spv.inline.decorate.member.hlsl @@ -4,9 +4,9 @@ template [[vk::ext_instruction(/*spv::OpBitcast*/124)]] T Bitcast(U); -// CHECK: OpMemberDecorate %S 0 Offset 0 -// CHECK: OpMemberDecorate %S 1 Offset 16 -// CHECK: %S = OpTypeStruct %v4float %v4float +// CHECK-DAG: OpMemberDecorate %S 0 Offset 0 +// CHECK-DAG: OpMemberDecorate %S 1 Offset 16 +// CHECK-DAG: %S = OpTypeStruct %v4float %v4float struct S { @@ -14,6 +14,12 @@ struct S [[vk::ext_decorate(/*offset*/ 35, 16)]] float4 f2; }; +// CHECK-DAG: OpDecorateString %out_var_SV_TARGET UserSemantic "raster_order_group_0" +struct PixelOutput +{ + [[vk::location(0), vk::ext_decorate_string(5635, "raster_order_group_0")]] float4 rt0 : SV_TARGET; +}; + using PointerType = vk::SpirvOpaqueType< /* OpTypePointer */ 32, /* PhysicalStorageBuffer */ vk::Literal >, @@ -27,14 +33,16 @@ S Load(PointerType pointer, uint64_t address; -float4 main() : SV_TARGET +PixelOutput main() { // CHECK: [[BC:%[0-9]+]] = OpBitcast %_ptr_PhysicalStorageBuffer_S {{%[0-9]+}} PointerType ptr = Bitcast(address); +PixelOutput output; // CHECK: [[LD:%[0-9]+]] = OpLoad %S [[BC]] Aligned 32 // CHECK: [[RET:%[0-9]+]] = OpCompositeExtract %v4float [[LD]] 0 // CHECK: OpStore %out_var_SV_TARGET [[RET]] - return Load(ptr).f1; +output.rt0 = Load(ptr).f1; + return output; } diff --git a/tools/clang/test/CodeGenSPIRV/inline-spirv/spv.intrinsicExecutionModeId.hlsl b/tools/clang/test/CodeGenSPIRV/inline-spirv/spv.intrinsicExecutionModeId.hlsl index 0d63662ef8..beb0e23a95 100644 --- a/tools/clang/test/CodeGenSPIRV/inline-spirv/spv.intrinsicExecutionModeId.hlsl +++ b/tools/clang/test/CodeGenSPIRV/inline-spirv/spv.intrinsicExecutionModeId.hlsl @@ -2,11 +2,11 @@ // CHECK: OpCapability ShaderClockKHR // CHECK: OpExtension "SPV_KHR_shader_clock" -// CHECK: OpExecutionModeId {{%[a-zA-Z0-9_]+}} LocalSizeId %uint_8 %uint_8 %uint_8 -// CHECK: OpExecutionModeId {{%[a-zA-Z0-9_]+}} LocalSizeHintId %uint_4 %uint_4 %uint_4 +// CHECK: OpExecutionModeId {{%[a-zA-Z0-9_]+}} LocalSizeId %uint_8 %uint_6 %uint_8 +// CHECK: OpExecutionModeId {{%[a-zA-Z0-9_]+}} LocalSizeHintId %int_4 %int_4 %int_4 int main() : SV_Target0 { - vk::ext_execution_mode_id(/*LocalSizeId*/38, 8, 8, 8); + vk::ext_execution_mode_id(/*LocalSizeId*/38, 8u, 6u, 8u); [[vk::ext_capability(5055)]] [[vk::ext_extension("SPV_KHR_shader_clock")]] diff --git a/tools/clang/test/CodeGenSPIRV/intrinsics.ddx.double.hlsl b/tools/clang/test/CodeGenSPIRV/intrinsics.ddx.double.hlsl new file mode 100644 index 0000000000..a306463466 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/intrinsics.ddx.double.hlsl @@ -0,0 +1,21 @@ +// RUN: %dxc -T ps_6_2 -E main -fcgl %s -spirv 2>&1 | FileCheck %s + +// CHECK: :14:22: warning: conversion from larger type 'double' to smaller type 'float', possible loss of data [-Wconversion] +// CHECK: :20:22: warning: conversion from larger type 'double2' to smaller type 'vector', possible loss of data [-Wconversion] + +void main() { + double a; + double2 b; + +// CHECK: [[a:%[0-9]+]] = OpLoad %double %a +// CHECK-NEXT: [[c:%[0-9]+]] = OpFConvert %float [[a]] +// CHECK-NEXT: [[r:%[0-9]+]] = OpDPdx %float [[c]] +// CHECK-NEXT: OpFConvert %double [[r]] + double da = ddx(a); + +// CHECK: [[b:%[0-9]+]] = OpLoad %v2double %b +// CHECK-NEXT: [[c:%[0-9]+]] = OpFConvert %v2float [[b]] +// CHECK-NEXT: [[r:%[0-9]+]] = OpDPdx %v2float [[c]] +// CHECK-NEXT: OpFConvert %v2double [[r]] + double2 db = ddx(b); +} \ No newline at end of file diff --git a/tools/clang/test/CodeGenSPIRV/intrinsics.ddx.half.hlsl b/tools/clang/test/CodeGenSPIRV/intrinsics.ddx.half.hlsl new file mode 100644 index 0000000000..11b63151ee --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/intrinsics.ddx.half.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxc -T ps_6_2 -E main -enable-16bit-types -fcgl %s -spirv | FileCheck %s + +void main() { + + half a; + half2 b; + +// CHECK: [[a:%[0-9]+]] = OpLoad %half %a +// CHECK-NEXT: [[c:%[0-9]+]] = OpFConvert %float [[a]] +// CHECK-NEXT: [[r:%[0-9]+]] = OpDPdx %float [[c]] +// CHECK-NEXT: OpFConvert %half [[r]] + half da = ddx(a); + +// CHECK: [[b:%[0-9]+]] = OpLoad %v2half %b +// CHECK-NEXT: [[c:%[0-9]+]] = OpFConvert %v2float [[b]] +// CHECK-NEXT: [[r:%[0-9]+]] = OpDPdx %v2float [[c]] +// CHECK-NEXT: OpFConvert %v2half [[r]] + half2 db = ddx(b); +} diff --git a/tools/clang/test/CodeGenSPIRV/intrinsics.mul.hlsl b/tools/clang/test/CodeGenSPIRV/intrinsics.mul.hlsl index 4d04896781..629e7527c3 100644 --- a/tools/clang/test/CodeGenSPIRV/intrinsics.mul.hlsl +++ b/tools/clang/test/CodeGenSPIRV/intrinsics.mul.hlsl @@ -1,5 +1,8 @@ // RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s +StructuredBuffer buffer_vec; +StructuredBuffer buffer_mat; + /* According to HLSL reference, mul() has the following versions: @@ -448,6 +451,7 @@ void main() { // mul( Mat(Mx1) * Mat(1xN) ) --> Mat(MxN) matrix float1x3 mat1x3; float3x2 mat3x2; + float3x3 mat3x3; float3x1 mat3x1; float1x4 mat1x4; @@ -474,4 +478,25 @@ void main() { // CHECK-NEXT: [[result3:%[0-9]+]] = OpCompositeConstruct %mat3v4float [[row0]] [[row1]] [[row2]] // CHECK-NEXT: OpStore %result3 [[result3]] float3x4 result3 = mul( mat3x1, mat1x4 ); // result is float3x4 matrix + + float3 v3; + +// CHECK: [[matp:%[0-9]+]] = OpAccessChain %_ptr_Uniform_mat3v3float %buffer_mat %int_0 %int_0 +// CHECK: [[mat:%[0-9]+]] = OpLoad %mat3v3float [[matp]] +// CHECK: [[vec:%[0-9]+]] = OpLoad %v3float %v3 +// CHECK: {{.*}} = OpVectorTimesMatrix %v3float [[vec]] [[mat]] + float3 result4 = mul(buffer_mat.Load(0), v3); + +// CHECK: [[mat:%[0-9]+]] = OpLoad %mat3v3float %mat3x3 +// CHECK: [[vecp:%[0-9]+]] = OpAccessChain %_ptr_Uniform_v3float %buffer_vec %int_0 %int_1 +// CHECK: [[vec:%[0-9]+]] = OpLoad %v3float [[vecp]] +// CHECK: {{.*}} = OpVectorTimesMatrix %v3float [[vec]] [[mat]] + float3 result5 = mul(mat3x3, buffer_vec.Load(1)); + +// CHECK: [[matp:%[0-9]+]] = OpAccessChain %_ptr_Uniform_mat3v3float %buffer_mat %int_0 %int_2 +// CHECK: [[mat:%[0-9]+]] = OpLoad %mat3v3float [[matp]] +// CHECK: [[vecp:%[0-9]+]] = OpAccessChain %_ptr_Uniform_v3float %buffer_vec %int_0 %int_2 +// CHECK: [[vec:%[0-9]+]] = OpLoad %v3float [[vecp]] +// CHECK: {{.*}} = OpVectorTimesMatrix %v3float [[vec]] [[mat]] + float3 result6 = mul(buffer_mat.Load(2), buffer_vec.Load(2)); } diff --git a/tools/clang/test/CodeGenSPIRV/intrinsics.vkrawbufferload.hlsl b/tools/clang/test/CodeGenSPIRV/intrinsics.vkrawbufferload.hlsl index 7be0713e48..c2892cfc29 100644 --- a/tools/clang/test/CodeGenSPIRV/intrinsics.vkrawbufferload.hlsl +++ b/tools/clang/test/CodeGenSPIRV/intrinsics.vkrawbufferload.hlsl @@ -12,7 +12,16 @@ struct BufferData { float3 v; }; +using MyInt = vk::SpirvType< + /*spv::OpTypeInt*/21, + 1,1, // size and alignment + vk::Literal >, // bits + vk::Literal > // signed +>; + uint64_t Address; + +[[vk::ext_capability(/* Int16 */ 22)]] float4 main() : SV_Target0 { // CHECK: [[addr:%[0-9]+]] = OpLoad %ulong // CHECK-NEXT: [[buf:%[0-9]+]] = OpBitcast %_ptr_PhysicalStorageBuffer_float [[addr]] @@ -50,5 +59,10 @@ float4 main() : SV_Target0 { // CHECK-NEXT: [[load:%[0-9]+]] = OpLoad %BufferData_0 [[buf]] Aligned 4 d = vk::RawBufferLoad(0); + // CHECK: [[buf:%[0-9]+]] = OpBitcast %_ptr_PhysicalStorageBuffer_spirvIntrinsicType %ulong_0 + // CHECK-NEXT: [[load:%[0-9]+]] = OpLoad %spirvIntrinsicType [[buf]] Aligned 4 + // CHECK-NEXT: OpStore %mi [[load]] + MyInt mi = vk::RawBufferLoad(0); + return float4(w.x, x, y, z); } diff --git a/tools/clang/test/CodeGenSPIRV/linalg/outerproductaccumulate-spirv-errors.hlsl b/tools/clang/test/CodeGenSPIRV/linalg/outerproductaccumulate-spirv-errors.hlsl new file mode 100644 index 0000000000..0213103926 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/linalg/outerproductaccumulate-spirv-errors.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types -spirv %s -verify + +// Tests that the header file cannot be included for spirv compilations +// This is a copy of \tools\clang\test\CodeGenDXIL\hlsl\linalg\outerproductaccumulate.hlsl +// except that spirv is targeted + +// expected-error@dx/linalg.h:4{{Cooperative vectors not (yet) supported for SPIRV}} +#include + +RWByteAddressBuffer RWBuf; + +export void Test4(vector Input1, vector Input2) { + using namespace dx::linalg; + + RWMatrixRef + matrix = {RWBuf, 0, 0}; + + OuterProductAccumulate(Input1, Input2, matrix); +} diff --git a/tools/clang/test/CodeGenSPIRV/logical_copy.hlsl b/tools/clang/test/CodeGenSPIRV/logical_copy.hlsl new file mode 100644 index 0000000000..eb4a803548 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/logical_copy.hlsl @@ -0,0 +1,67 @@ +// RUN: %dxc %s -fcgl -spirv -T ps_6_8 -fspv-target-env=vulkan1.1spirv1.4 | FileCheck %s + + + +struct WithBool { + bool b; +}; + +struct StructWithBool { + WithBool wb; +}; + +struct StructWithoutBool { + int a; +}; + +struct OuterStruct { + StructWithBool a[2]; + WithBool b; + StructWithoutBool c; + StructWithoutBool d[2]; +} S; + + +// CHECK: %GetStruct = OpFunction %OuterStruct_0 None %34 +// CHECK: %bb_entry_0 = OpLabel +// CHECK: [[ld:%[0-9]+]] = OpLoad %OuterStruct %39 + +// The array `a` must be split up because it contains a bool that needs a +// conversion from int to bool. +// CHECK: [[arr_with_bool:%[0-9]+]] = OpCompositeExtract %_arr_StructWithBool_uint_2 [[ld]] 0 +// CHECK: [[struct_with_bool:%[0-9]+]] = OpCompositeExtract %StructWithBool [[arr_with_bool]] 0 +// CHECK: [[with_bool:%[0-9]+]] = OpCompositeExtract %WithBool [[struct_with_bool]] 0 +// CHECK: [[int:%[0-9]+]] = OpCompositeExtract %uint [[with_bool]] 0 +// CHECK: [[bool:%[0-9]+]] = OpINotEqual %bool [[int]] %uint_0 +// CHECK: [[with_bool:%[0-9]+]] = OpCompositeConstruct %WithBool_0 [[bool]] +// CHECK: [[struct_with_bool:%[0-9]+]] = OpCompositeConstruct %StructWithBool_0 [[with_bool]] + +// Skip second element of the array. It is more of the same. +// CHECK: [[a:%[0-9]+]] = OpCompositeConstruct %_arr_StructWithBool_0_uint_2 [[struct_with_bool]] {{%.*}} + +// The struct `b` must be split up for the same reason. +// CHECK: [[with_bool:%[0-9]+]] = OpCompositeExtract %WithBool [[ld]] 1 +// CHECK: [[int:%[0-9]+]] = OpCompositeExtract %uint [[with_bool]] 0 +// CHECK: [[bool:%[0-9]+]] = OpINotEqual %bool [[int]] %uint_0 +// CHECK: [[b:%[0-9]+]] = OpCompositeConstruct %WithBool_0 [[bool]] + +// The struct `c` can use OpCopyLogical. +// CHECK: %59 = OpCompositeExtract %StructWithoutBool [[ld]] 2 +// CHECK: [[c:%[0-9]+]] = OpCopyLogical %StructWithoutBool_0 %59 + +// The array `d` can use OpCopyLogical. +// CHECK: %61 = OpCompositeExtract %_arr_StructWithoutBool_uint_2 [[ld]] 3 +// CHECK: [[d:%[0-9]+]] = OpCopyLogical %_arr_StructWithoutBool_0_uint_2 %61 + +// CHECK: [[r:%[0-9]+]] = OpCompositeConstruct %OuterStruct_0 [[a]] [[b]] [[c]] [[d]] +// CHECK: OpStore {{%.*}} [[r]] +// CHECK: OpFunctionEnd + +OuterStruct GetStruct() { return S; } + +uint main() : SV_TARGET +{ + GetStruct(); + return 0; +} + diff --git a/tools/clang/test/CodeGenSPIRV/mesh_shader_derivative.hlsl b/tools/clang/test/CodeGenSPIRV/mesh_shader_derivative.hlsl new file mode 100644 index 0000000000..3f26921e28 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/mesh_shader_derivative.hlsl @@ -0,0 +1,34 @@ +// RUN: %dxc -T ms_6_5 -E main -fspv-target-env=vulkan1.3 %s -spirv | FileCheck %s --check-prefix=VK13 +// RUN: %dxc -T ms_6_5 -E main -fspv-target-env=vulkan1.1 -Vd %s -spirv | FileCheck %s --check-prefix=VK11 + +// VK13-DAG: OpCapability ComputeDerivativeGroupLinearKHR +// VK13-DAG: OpCapability DerivativeControl +// vk13-DAG: OpCapability MeshShadingEXT +// VK13-DAG: OpExtension "SPV_EXT_mesh_shader" +// VK13-DAG: OpExtension "SPV_KHR_compute_shader_derivatives" +// VK13: OpEntryPoint MeshEXT %main "main" +// VK13: OpExecutionMode %main DerivativeGroupLinearKHR + +// VK11-DAG: OpExtension "SPV_NV_mesh_shader" +// VK11: OpEntryPoint MeshNV %main "main" +// VK11-NOT: OpExecutionMode %main DerivativeGroup + +struct VSOut +{ + float4 pos : SV_Position; +}; + +[numthreads(4, 1, 1)] +[outputtopology("triangle")] +void main(in uint tid : SV_GroupThreadID, out vertices VSOut verts[3], out indices uint3 tris[1]) +{ + SetMeshOutputCounts(3, 1); + + float4 val = ddx_coarse(float4(tid, 0, 0, 0)); + + verts[0].pos = val; + verts[1].pos = val + float4(0,1,0,0); + verts[2].pos = val + float4(1,0,0,0); + + tris[0] = uint3(0,1,2); +} diff --git a/tools/clang/test/CodeGenSPIRV/meshshading.ext.amplification.payload.hlsl b/tools/clang/test/CodeGenSPIRV/meshshading.ext.amplification.payload.hlsl new file mode 100644 index 0000000000..c50ef252e9 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/meshshading.ext.amplification.payload.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -E main -T as_6_8 -spirv %s -E main -fspv-target-env=vulkan1.1spirv1.4 | FileCheck %s + +struct S { + uint a; +}; + +groupshared S s; +// CHECK: %s = OpVariable {{.*}} TaskPayloadWorkgroupEXT + +[numthreads(1, 1, 1)] +void main() +{ +// CHECK: OpEmitMeshTasksEXT %uint_1 %uint_1 %uint_1 %s + DispatchMesh(1, 1, 1, s); +} diff --git a/tools/clang/test/CodeGenSPIRV/meshshading.ext.cullprimative.hlsl b/tools/clang/test/CodeGenSPIRV/meshshading.ext.cullprimative.hlsl index cb5d7f771f..2a143afab2 100644 --- a/tools/clang/test/CodeGenSPIRV/meshshading.ext.cullprimative.hlsl +++ b/tools/clang/test/CodeGenSPIRV/meshshading.ext.cullprimative.hlsl @@ -1,6 +1,4 @@ // RUN: %dxc -T ms_6_6 -fspv-target-env=vulkan1.1spirv1.4 -E main %s -spirv | FileCheck %s -// XFAIL: * -// FIXME(7160): test disabled until the spirv-val fix is merged. struct MeshletPrimitiveOut { diff --git a/tools/clang/test/CodeGenSPIRV/node.barrier.compute.hlsl b/tools/clang/test/CodeGenSPIRV/node.barrier.compute.hlsl new file mode 100644 index 0000000000..42b18d35a0 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.barrier.compute.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 -fspv-target-env=vulkan1.3 external %s | FileCheck %s + +// Barrier is called from a compute shader + +[Shader("compute")] +[NumThreads(5,1,1)] +void node116_barrier_compute() +{ + Barrier(1, 3); +} + +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK: [[U2:%[^ ]*]] = OpConstant [[UINT]] 2 +// CHECK-DAG: [[U72:%[^ ]*]] = OpConstant [[UINT]] 72 +// CHECK: OpControlBarrier [[U2]] [[U2]] [[U72]] diff --git a/tools/clang/test/CodeGenSPIRV/node.barrier.memory-arg.hlsl b/tools/clang/test/CodeGenSPIRV/node.barrier.memory-arg.hlsl new file mode 100644 index 0000000000..9b2dc23eea --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.barrier.memory-arg.hlsl @@ -0,0 +1,60 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 -fspv-target-env=vulkan1.3 -enable-16bit-types %s | FileCheck %s + +// Barrier is called using a memory type argument + +static const int a = 7; +static const int16_t b = 2; + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(16, 1, 1)] +void node117_barrier_memoryarg() +{ + // literal integer flag values + Barrier(1, 3); + + // static const integer flag values + Barrier(a, b); + + // AllMemoryBarrier() -> + Barrier(UAV_MEMORY|GROUP_SHARED_MEMORY|NODE_INPUT_MEMORY|NODE_OUTPUT_MEMORY, + DEVICE_SCOPE); + + // AllMemoryBarrierWithGroupSync() -> + Barrier(UAV_MEMORY|GROUP_SHARED_MEMORY|NODE_INPUT_MEMORY|NODE_OUTPUT_MEMORY, + GROUP_SYNC|DEVICE_SCOPE); + + // DeviceMemoryBarrier() -> + Barrier(UAV_MEMORY, + DEVICE_SCOPE); + + // DeviceMemoryBarrierWithGroupSync() -> + Barrier(UAV_MEMORY, + GROUP_SYNC|DEVICE_SCOPE); + + // GroupMemoryBarrier() -> + Barrier(GROUP_SHARED_MEMORY, + GROUP_SCOPE); + + // GroupMemoryBarrierWithGroupSync() -> + Barrier(GROUP_SHARED_MEMORY, + GROUP_SYNC|GROUP_SCOPE); +} + + +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U2:%[^ ]*]] = OpConstant %uint 2 +// CHECK-DAG: [[U5:%[^ ]*]] = OpConstant %uint 5 +// CHECK-DAG: [[U72:%[^ ]*]] = OpConstant %uint 72 +// CHECK-DAG: [[U264:%[^ ]*]] = OpConstant %uint 264 +// CHECK-DAG: [[U328:%[^ ]*]] = OpConstant %uint 328 +// CHECK-DAG: [[U4424:%[^ ]*]] = OpConstant %uint 4424 + +// CHECK: OpControlBarrier [[U2]] [[U2]] [[U72]] +// CHECK: OpMemoryBarrier [[U2]] [[U328]] +// CHECK: OpMemoryBarrier [[U5]] [[U4424]] +// CHECK: OpControlBarrier [[U2]] [[U5]] [[U4424]] +// CHECK: OpMemoryBarrier [[U5]] [[U72]] +// CHECK: OpControlBarrier [[U2]] [[U5]] [[U72]] +// CHECK: OpMemoryBarrier [[U2]] [[U264]] +// CHECK: OpControlBarrier [[U2]] [[U2]] [[U264]] diff --git a/tools/clang/test/CodeGenSPIRV/node.barrier.object-arg.hlsl b/tools/clang/test/CodeGenSPIRV/node.barrier.object-arg.hlsl new file mode 100644 index 0000000000..215acf7bfd --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.barrier.object-arg.hlsl @@ -0,0 +1,213 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Barrier is called with each node record and UAV type + +struct RECORD +{ + uint value; +}; + +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U256:%[^ ]*]] = OpConstant [[UINT]] 256 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[U3:%[^ ]*]] = OpConstant [[UINT]] 3 +// CHECK-DAG: [[U4:%[^ ]*]] = OpConstant [[UINT]] 4 +// CHECK-DAG: [[U2:%[^ ]*]] = OpConstant [[UINT]] 2 +// CHECK-DAG: [[U4424:%[^ ]*]] = OpConstant [[UINT]] 4424 +// CHECK-DAG: [[U5:%[^ ]*]] = OpConstant [[UINT]] 5 + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,1)] +[NodeDispatchGrid(256,1,1)] +void node01(DispatchNodeInputRecord input) +{ + Barrier(input, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(256,1,1)] +void node02([MaxRecords(8)] GroupNodeInputRecords input) +{ + Barrier(input, 3); +} + +// CHECK: OpControlBarrier %uint_2 %uint_2 %uint_4424 + +[Shader("node")] +[NodeLaunch("thread")] +void node03(RWThreadNodeInputRecord input) +{ + Barrier(input, 0); +} + +// CHECK: OpMemoryBarrier %uint_4 %uint_4424 + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(256,1,1)] +void node04([MaxRecords(6)] RWGroupNodeInputRecords input) +{ + Barrier(input, 0); +} + +// CHECK: OpMemoryBarrier %uint_4 %uint_4424 + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,1)] +[NodeDispatchGrid(256,1,1)] +void node05([MaxRecords(5)] NodeOutput outputs) +{ + ThreadNodeOutputRecords outrec = outputs.GetThreadNodeOutputRecords(1); + Barrier(outrec, 0); +} + +// CHECK: OpMemoryBarrier %uint_4 %uint_4424 + +[Shader("node")] +[NodeLaunch("thread")] +void node06([MaxRecords(5)] NodeOutput outputs) +{ + ThreadNodeOutputRecords outrec = outputs.GetThreadNodeOutputRecords(3); + Barrier(outrec, 0); +} + +// CHECK: OpMemoryBarrier %uint_4 %uint_4424 + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(256,1,3)] +void node07([MaxRecords(5)] NodeOutput outputs) +{ + GroupNodeOutputRecords outrec = outputs.GetGroupNodeOutputRecords(1); + Barrier(outrec, 3); +} + +// CHECK: OpControlBarrier %uint_2 %uint_2 %uint_4424 + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node08([MaxRecords(5)] NodeOutput outputs) +{ + GroupNodeOutputRecords outrec = outputs.GetGroupNodeOutputRecords(4); + Barrier(outrec, 3); +} + +// CHECK: OpControlBarrier %uint_2 %uint_2 %uint_4424 + +RWBuffer obj09; +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node09() +{ + Barrier(obj09, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 + +RWTexture1D obj10; +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node10() +{ + Barrier(obj10, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 + +RWTexture1DArray obj11; +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node11() +{ + Barrier(obj11, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 + +RWTexture2D obj12; +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node12() +{ + Barrier(obj12, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 + +RWTexture2DArray obj13; +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node13() +{ + Barrier(obj13, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 + +RWTexture3D obj14; +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node14() +{ + Barrier(obj14, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 + +RWStructuredBuffer obj15; +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node15() +{ + Barrier(obj15, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 + +RWByteAddressBuffer obj16; +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node16() +{ + Barrier(obj16, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 + +AppendStructuredBuffer obj17; +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(256,1,4)] +[NodeDispatchGrid(256,1,1)] +void node17() +{ + Barrier(obj17, 5); +} + +// CHECK: OpControlBarrier %uint_2 %uint_5 %uint_4424 diff --git a/tools/clang/test/CodeGenSPIRV/node.broadcasting.no-input.hlsl b/tools/clang/test/CodeGenSPIRV/node.broadcasting.no-input.hlsl new file mode 100644 index 0000000000..a3c369b252 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.broadcasting.no-input.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// Broadcasting launch node with no input + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(3,4,5)] +[NumThreads(6,7,1)] +[NodeIsProgramEntry] +void node070_broadcasting_noinput() +{ +} + +// CHECK: OpReturn + diff --git a/tools/clang/test/CodeGenSPIRV/node.coalescing.num-threads.hlsl b/tools/clang/test/CodeGenSPIRV/node.coalescing.num-threads.hlsl new file mode 100644 index 0000000000..14e899da02 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.coalescing.num-threads.hlsl @@ -0,0 +1,16 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 -fspv-target-env=vulkan1.3 external %s | FileCheck %s + +// Coalescing launch node with thread group defined in the shader + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(1024,1,1)] +[NodeIsProgramEntry] +void node008_coalescing_numthreads_shader() +{ +} + +// CHECK: OpEntryPoint GLCompute [[SHADER:%[0-9A-Za-z_]*]] +// CHECK-DAG: OpExecutionMode [[SHADER]] CoalescingAMDX +// CHECK-DAG: OpExecutionMode [[SHADER]] LocalSize 1024 1 1 +// CHECK: OpReturn diff --git a/tools/clang/test/CodeGenSPIRV/node.dispatch-grid.hlsl b/tools/clang/test/CodeGenSPIRV/node.dispatch-grid.hlsl new file mode 100644 index 0000000000..302c8ea698 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.dispatch-grid.hlsl @@ -0,0 +1,28 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 external %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Broadcasting launch node with dispatch grid defined in shader + +struct INPUT_NOGRID +{ + uint textureIndex; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(2,3,2)] +[NumThreads(1024,1,1)] +[NodeIsProgramEntry] +void node001_dispatchgrid_shader(DispatchNodeInputRecord input) +{ +} + +// CHECK: OpEntryPoint GLCompute [[SHADER:%[0-9A-Za-z_]*]] +// CHECK-DAG: OpExecutionMode [[SHADER]] LocalSize 1024 1 1 +// CHECK-DAG: OpExecutionModeId [[SHADER]] StaticNumWorkgroupsAMDX [[U2:%[0-9A-Za-z_]*]] +// CHECK-SAME: [[U3:%[^ ]*]] [[U2]] +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U2]] = OpConstant [[UINT]] 2 +// CHECK-DAG: [[U3]] = OpConstant [[UINT]] 3 +// CHECK: OpReturn diff --git a/tools/clang/test/CodeGenSPIRV/node.empty-node-input.hlsl b/tools/clang/test/CodeGenSPIRV/node.empty-node-input.hlsl new file mode 100644 index 0000000000..fa16429a1b --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.empty-node-input.hlsl @@ -0,0 +1,28 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Coalescing launch node declares EmptyNodeInput + +RWBuffer buf0; + +[Shader("node")] +[NodeLaunch("coalescing")] +[NodeIsProgramEntry] +[NumThreads(2,1,1)] +void emptynodeinput(EmptyNodeInput input) +{ + // input.Count should always return 1 here, so there is + // an opportunity for an optimization. + buf0[0] = input.Count(); +} + +// CHECK-DAG: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[IMG:%[^ ]*]] = OpTypeImage [[UINT]] Buffer 2 0 0 2 R32ui +// CHECK-DAG: [[IMGPTR:%[^ ]*]] = OpTypePointer UniformConstant [[IMG]] +// CHECK-DAG: [[BUF:%[^ ]*]] = OpVariable [[IMGPTR]] UniformConstant + +// CHECK: [[COUNT:%[^ ]*]] = OpNodePayloadArrayLengthAMDX [[UINT]] +// CHECK: [[IMAGE:%[^ ]*]] = OpLoad [[IMG]] [[BUF]] +// CHECK: OpImageWrite [[IMAGE]] [[U0]] [[COUNT]] None diff --git a/tools/clang/test/CodeGenSPIRV/node.finished-cross-group-sharing.hlsl b/tools/clang/test/CodeGenSPIRV/node.finished-cross-group-sharing.hlsl new file mode 100644 index 0000000000..8e1ce56307 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.finished-cross-group-sharing.hlsl @@ -0,0 +1,32 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// FinishedCrossGroupSharing() is called with RWDispatchNodeInputRecord + +RWBuffer buf0; + +struct [NodeTrackRWInputSharing] INPUT_RECORD +{ + uint value; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(256,1,1)] +[NumThreads(1,1,1)] +void node037_finishedcrossgroupsharing(RWDispatchNodeInputRecord input) +{ + bool b = input.FinishedCrossGroupSharing(); + buf0[0] = 0 ? b : 1; +} + +// CHECK: OpName [[INPUT:%[^ ]*]] "input" +// CHECK: OpDecorate [[STRUCT:%[^ ]*]] TrackFinishWritingAMDX +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK: [[STRUCT]] = OpTypeStruct [[UINT]] +// CHECK: [[ARR:%[^ ]*]] = OpTypeNodePayloadArrayAMDX [[STRUCT]] +// CHECK: [[PTR:%[^ ]*]] = OpTypePointer NodePayloadAMDX [[ARR]] +// CHECK: [[BOOL:%[^ ]*]] = OpTypeBool +// CHECK: [[INPUT]] = OpFunctionParameter [[PTR]] +// CHECK: OpFinishWritingNodePayloadAMDX [[BOOL]] [[INPUT]] diff --git a/tools/clang/test/CodeGenSPIRV/node.get-input-record-count.hlsl b/tools/clang/test/CodeGenSPIRV/node.get-input-record-count.hlsl new file mode 100644 index 0000000000..a3af668c46 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.get-input-record-count.hlsl @@ -0,0 +1,25 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 external -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// GetInputRecordCount() called with NodeInputRecordArray + +RWBuffer buf0; + +struct INPUT_RECORD +{ + uint textureIndex; +}; + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(1024,1,1)] +[NodeIsProgramEntry] +void node014_getinputrecordcount([MaxRecords(256)] GroupNodeInputRecords inputs) +{ + uint numRecords = inputs.Count(); + buf0[0] = numRecords; +} + +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK: OpNodePayloadArrayLengthAMDX [[UINT]] diff --git a/tools/clang/test/CodeGenSPIRV/node.get-node-output-record.multiple.hlsl b/tools/clang/test/CodeGenSPIRV/node.get-node-output-record.multiple.hlsl new file mode 100644 index 0000000000..d029bd20bb --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.get-node-output-record.multiple.hlsl @@ -0,0 +1,72 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// Multiple calls to Get*NodeOuputRecords(array) + +struct RECORD { + int i; + float3 foo; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(64, 1, 1)] +[NodeDispatchGrid(8, 1, 1)] +void node150_a(NodeOutput output) +{ + GroupNodeOutputRecords outRec1 = output.GetGroupNodeOutputRecords(1); + GroupNodeOutputRecords outRec2 = output.GetGroupNodeOutputRecords(4); + outRec1.OutputComplete(); + outRec2.OutputComplete(); +} + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(64, 1, 1)] +[NodeDispatchGrid(8, 1, 1)] +void node150_b(NodeOutput output) +{ + ThreadNodeOutputRecords outRec1 = output.GetThreadNodeOutputRecords(5); + ThreadNodeOutputRecords outRec2 = output.GetThreadNodeOutputRecords(1); + outRec1.OutputComplete(); + outRec1 = outRec2; + outRec1.OutputComplete(); +} + +// CHECK: OpDecorateId [[ARR_A:%[^ ]*]] PayloadNodeNameAMDX [[STR:%[0-9A-Za-z_]*]] +// CHECK: OpDecorateId [[ARR_B:%[^ ]*]] PayloadNodeNameAMDX [[STR]] + +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[U2:%[^ ]*]] = OpConstant [[UINT]] 2 +// CHECK-DAG: [[U4:%[^ ]*]] = OpConstant [[UINT]] 4 +// CHECK-DAG: [[U5:%[^ ]*]] = OpConstant [[UINT]] 5 +// CHECK-DAG: [[STR]] = OpConstantStringAMDX "output" +// CHECK-DAG: [[ARR_A]] = OpTypeNodePayloadArrayAMDX +// CHECK-DAG: [[ARR_B]] = OpTypeNodePayloadArrayAMDX +// CHECK-DAG: [[FPTR_A:%[^ ]*]] = OpTypePointer Function [[ARR_A]] +// CHECK-DAG: [[NPTR_A:%[^ ]*]] = OpTypePointer NodePayloadAMDX [[ARR_A]] +// CHECK-DAG: [[FPTR_B:%[^ ]*]] = OpTypePointer Function [[ARR_B]] +// CHECK-DAG: [[NPTR_B:%[^ ]*]] = OpTypePointer NodePayloadAMDX [[ARR_B]] + +// checking for OpFunctionCall skips over the entry function wrapper and +// thereby avoids matching wrapper variables +// CHECK: OpFunctionCall +// CHECK: [[OUT1:%[^ ]*]] = OpVariable [[FPTR_A]] +// CHECK: [[OUT2:%[^ ]*]] = OpVariable [[FPTR_A]] +// CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX [[NPTR_A]] [[U2]] [[U1]] [[U0]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad [[ARR_A]] [[PAY]] +// CHECK: OpStore [[OUT1]] [[VAL]] +// CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX [[NPTR_A]] [[U2]] [[U4]] [[U0]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad [[ARR_A]] [[PAY]] +// CHECK: OpStore [[OUT2]] [[VAL]] +// CHECK: OpFunctionCall +// CHECK: [[OUT1:%[^ ]*]] = OpVariable [[FPTR_B]] +// CHECK: [[OUT2:%[^ ]*]] = OpVariable [[FPTR_B]] +// CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX [[NPTR_B]] [[U4]] [[U5]] [[U0]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad [[ARR_B]] [[PAY]] +// CHECK: OpStore [[OUT1]] [[VAL]] +// CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX [[NPTR_B]] [[U4]] [[U1]] [[U0]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad [[ARR_B]] [[PAY]] +// CHECK: OpStore [[OUT2]] [[VAL]] +// CHECK: OpFunctionEnd diff --git a/tools/clang/test/CodeGenSPIRV/node.get-remaining-recursion-levels.hlsl b/tools/clang/test/CodeGenSPIRV/node.get-remaining-recursion-levels.hlsl new file mode 100644 index 0000000000..f981282748 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.get-remaining-recursion-levels.hlsl @@ -0,0 +1,26 @@ +// RUN: %dxc -spirv -T lib_6_8 external -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// GetRemainingRecusionLevels() called + +RWBuffer buf0; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(8,1,1)] +[NodeDispatchGrid(32,2,2)] +[NodeMaxRecursionDepth(16)] +void node133_getremainingrecursionlevels() +{ + uint remaining = GetRemainingRecursionLevels(); + // Use resource as a way of preventing DCE + buf0[0] = remaining; +} + +// CHECK: OpEntryPoint GLCompute [[SHADER:%[^ ]*]] "node133_getremainingrecursionlevels" [[RRL:%[^ ]*]] +// CHECK: OpExecutionModeId [[SHADER]] MaxNodeRecursionAMDX [[U16:%[^ ]*]] +// CHECK: OpDecorate [[RRL]] BuiltIn RemainingRecursionLevelsAMDX +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK: [[U16]] = OpConstant [[UINT]] 16 +// CHECK: [[PTR:%[^ ]*]] = OpTypePointer Input [[UINT]] +// CHECK: [[RRL]] = OpVariable [[PTR]] Input +// CHECK: OpLoad [[UINT]] [[RRL]] diff --git a/tools/clang/test/CodeGenSPIRV/node.group-shared.barrier.hlsl b/tools/clang/test/CodeGenSPIRV/node.group-shared.barrier.hlsl new file mode 100644 index 0000000000..cf1638d75c --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.group-shared.barrier.hlsl @@ -0,0 +1,18 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// Check that a barrier can be used on a groupshared object from a +// work graph node + +groupshared uint Test; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(1, 1, 1)] +void firstNode() +{ + Test = 1; + AllMemoryBarrierWithGroupSync(); +} + +// CHECK: OpReturn diff --git a/tools/clang/test/CodeGenSPIRV/node.group-shared.hlsl b/tools/clang/test/CodeGenSPIRV/node.group-shared.hlsl new file mode 100644 index 0000000000..81fc0e39a2 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.group-shared.hlsl @@ -0,0 +1,24 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Check that group shared memory is allowed from a work graph node + +struct Record +{ + uint index; +}; + +groupshared uint testLds[512]; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(2, 1, 1)] +[NumThreads(1,1,1)] +void firstNode(DispatchNodeInputRecord inputData) +{ + testLds[inputData.Get().index] = 99; +} + +// CHECK: OpReturn + diff --git a/tools/clang/test/CodeGenSPIRV/node.increment-output-count.group.hlsl b/tools/clang/test/CodeGenSPIRV/node.increment-output-count.group.hlsl new file mode 100644 index 0000000000..d6a2ea759e --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.increment-output-count.group.hlsl @@ -0,0 +1,22 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 external -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// Node with EmptyNodeOutput calls GroupIncrementOutputCount + + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(1024,1,1)] +[NodeIsProgramEntry] +void node028_incrementoutputcount([MaxRecords(32)] EmptyNodeOutput empty) +{ + empty.GroupIncrementOutputCount(1); +} + +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[STRUCT:%[^ ]*]] = OpTypeStruct +// CHECK-DAG: [[ARR:%[^ ]*]] = OpTypeNodePayloadArrayAMDX [[STRUCT]] +// CHECK-DAG: [[PTR:%[^ ]*]] = OpTypePointer NodePayloadAMDX [[ARR]] +// CHECK-DAG: [[U2:%[^ ]*]] = OpConstant [[UINT]] 2 +// CHECK: OpAllocateNodePayloadsAMDX [[PTR]] [[U2]] [[U1]] [[U0]] diff --git a/tools/clang/test/CodeGenSPIRV/node.increment-output-count.thread.hlsl b/tools/clang/test/CodeGenSPIRV/node.increment-output-count.thread.hlsl new file mode 100644 index 0000000000..6cd984fe69 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.increment-output-count.thread.hlsl @@ -0,0 +1,22 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 external -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// Node with EmptyNodeOutput calls ThreadIncrementOutputCount + + +[Shader("node")] +[NodeLaunch("thread")] +[NodeIsProgramEntry] +void node028_incrementoutputcount([MaxRecords(32)] EmptyNodeOutput empty) +{ + empty.ThreadIncrementOutputCount(1); +} + +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[STRUCT:%[^ ]*]] = OpTypeStruct +// CHECK-DAG: [[ARR:%[^ ]*]] = OpTypeNodePayloadArrayAMDX [[STRUCT]] +// CHECK-DAG: [[PTR:%[^ ]*]] = OpTypePointer NodePayloadAMDX [[ARR]] +// CHECK-DAG: OpConstantStringAMDX "empty" +// CHECK-DAG: [[U4:%[^ ]*]] = OpConstant [[UINT]] 4 +// CHECK: OpAllocateNodePayloadsAMDX [[PTR]] [[U4]] [[U1]] [[U0]] diff --git a/tools/clang/test/CodeGenSPIRV/node.input-record.dispatch-grid.array.hlsl b/tools/clang/test/CodeGenSPIRV/node.input-record.dispatch-grid.array.hlsl new file mode 100644 index 0000000000..bae3f759b8 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.input-record.dispatch-grid.array.hlsl @@ -0,0 +1,26 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Check that SV_DispatchGrid supports array + +struct RECORD +{ + uint a[3] : SV_DispatchGrid; + uint b[3]; +}; + +[Shader("node")] +[NodeLaunch("coalescing")] +[numthreads(4,4,4)] +void node01(RWGroupNodeInputRecords input) +{ + input.Get().a = input.Get().b; +} + +// CHECK: OpName [[RECORD:%[^ ]*]] "RECORD" +// CHECK: OpMemberDecorate [[RECORD]] 0 PayloadDispatchIndirectAMDX +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK: [[U3:%[^ ]*]] = OpConstant %uint 3 +// CHECK: [[ARRAY:%[^ ]*]] = OpTypeArray [[UINT]] [[U3]] +// CHECK: [[RECORD]] = OpTypeStruct [[ARRAY]] [[ARRAY]] diff --git a/tools/clang/test/CodeGenSPIRV/node.input-record.dispatch-grid.nested.hlsl b/tools/clang/test/CodeGenSPIRV/node.input-record.dispatch-grid.nested.hlsl new file mode 100644 index 0000000000..aee7e0d014 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.input-record.dispatch-grid.nested.hlsl @@ -0,0 +1,32 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Check that SV_DispatchGrid in nested struct is recognized + +struct INNER { + uint c; + uint3 grid : SV_DispatchGrid; +}; + +struct RECORD +{ + uint a; + INNER b; +}; + +[Shader("node")] +[NodeLaunch("coalescing")] +[numthreads(4,4,4)] +void node01(RWGroupNodeInputRecords input) +{ + input.Get().a = input.Get().b.grid.x; +} + +// CHECK: OpName [[RECORD:%[^ ]*]] "RECORD" +// CHECK: OpName [[INNER:%[^ ]*]] "INNER" +// CHECK: OpMemberDecorate [[INNER]] 1 PayloadDispatchIndirectAMDX +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK: [[VECTOR:%[^ ]*]] = OpTypeVector %uint 3 +// CHECK: [[INNER]] = OpTypeStruct [[UINT]] [[VECTOR]] +// CHECK: [[RECORD]] = OpTypeStruct [[UINT]] [[INNER]] diff --git a/tools/clang/test/CodeGenSPIRV/node.max-dispatch-grid.hlsl b/tools/clang/test/CodeGenSPIRV/node.max-dispatch-grid.hlsl new file mode 100644 index 0000000000..e2440a31c0 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.max-dispatch-grid.hlsl @@ -0,0 +1,30 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Broadcasting launch node with dispatch grid defined in input +// and max dispatch grid defined in the shader + +struct INPUT_GRID +{ + uint3 DispatchGrid : SV_DispatchGrid; + uint textureIndex; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeMaxDispatchGrid(2,3,4)] +[NumThreads(1024,1,1)] +void node002_dispatchgrid_input_maxdispatchgrid_shader(DispatchNodeInputRecord input) +{ +} + +// CHECK: OpEntryPoint GLCompute [[SHADER:%[^ ]*]] "node002_dispatchgrid_input_maxdispatchgrid_shader" +// CHECK-DAG: OpExecutionMode [[SHADER]] LocalSize 1024 1 1 +// CHECK-DAG: OpExecutionModeId [[SHADER]] MaxNumWorkgroupsAMDX [[U2:%[^ ]*]] [[U3:%[^ ]*]] [[U4:%[0-9A-Za-z_]*]] +// CHECK: OpMemberDecorate %{{[^ ]*}} 0 PayloadDispatchIndirectAMDX +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U2]] = OpConstant [[UINT]] 2 +// CHECK-DAG: [[U3]] = OpConstant [[UINT]] 3 +// CHECK-DAG: [[U4]] = OpConstant [[UINT]] 4 +// CHECK: OpReturn diff --git a/tools/clang/test/CodeGenSPIRV/node.max-records.hlsl b/tools/clang/test/CodeGenSPIRV/node.max-records.hlsl new file mode 100644 index 0000000000..7d8449afab --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.max-records.hlsl @@ -0,0 +1,45 @@ +// RUN: %dxc -spirv -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// Test referencing params with MaxOutputRecordsSharedWith + +struct rec0 +{ + int i0; + float f0; +}; + +struct rec1 +{ + float f1; + int i1; +}; + +[Shader("node")] +[NodeLaunch("thread")] +void BackwardRef( + RWThreadNodeInputRecord InputyMcInputFace, + [MaxRecords(5)] NodeOutput Output1, + [MaxRecordsSharedWith(Output1)] NodeOutput Output2) +{ +} + +// CHECK: OpDecorateId [[TYPE1:%[^ ]*]] PayloadNodeNameAMDX [[STR1:%[^ ]*]] +// CHECK: OpDecorateId [[TYPE1]] NodeMaxPayloadsAMDX [[U5:%[^ ]*]] +// CHECK: OpDecorateId [[TYPE2:%[^ ]*]] PayloadNodeNameAMDX [[STR2:%[^ ]*]] +// CHECK: OpDecorateId [[TYPE2]] NodeSharesPayloadLimitsWithAMDX [[TYPE1]] +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U5]] = OpConstant [[UINT]] 5 +// CHECK-DAG: [[STR1]] = OpConstantStringAMDX "Output1" +// CHECK-DAG: [[STR2]] = OpConstantStringAMDX "Output2" + +#if 0 +// copied from DXIL test but doesn't seem to conform to spec +[Shader("node")] +[NodeLaunch("thread")] +void ForwardRef( + RWThreadNodeInputRecord InputyMcInputFace, + [MaxRecordsSharedWith(Output2)] NodeOutput Output1, + [MaxRecords(5)] NodeOutput Output2) +{ +} +#endif diff --git a/tools/clang/test/CodeGenSPIRV/node.member.read.hlsl b/tools/clang/test/CodeGenSPIRV/node.member.read.hlsl new file mode 100644 index 0000000000..ac2474b29b --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.member.read.hlsl @@ -0,0 +1,150 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Read access to members of node input/output records + +RWBuffer buf0; + +struct RECORD +{ + uint a; + uint b; + uint c; +}; + +// CHECK: OpName [[BUF0:%[^ ]*]] "buf0" +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK: [[U16:%[^ ]*]] = OpConstant [[UINT]] 16 +// CHECK-DAG: [[INT:%[^ ]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[S0:%[^ ]*]] = OpConstant [[INT]] 0 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[S1:%[^ ]*]] = OpConstant [[INT]] 1 +// CHECK-DAG: [[U2:%[^ ]*]] = OpConstant [[UINT]] 2 +// CHECK-DAG: [[S2:%[^ ]*]] = OpConstant [[INT]] 2 +// CHECK-DAG: [[U4:%[^ ]*]] = OpConstant [[UINT]] 4 +// CHECK-DAG: [[U7:%[^ ]*]] = OpConstant [[UINT]] 7 +// CHECK-DAG: [[TBI:%[^ ]*]] = OpTypeImage [[UINT]] Buffer + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(16,1,1)] +void node01(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().a; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S0]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad [[UINT]] [[PTR]] +// CHECK: [[IMG:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[IMG]] [[U0]] [[VAL]] +// CHECK: OpFunctionEnd + + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(16,1,1)] +void node02(RWDispatchNodeInputRecord input) +{ + buf0[0] = input.Get().b; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S1]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad [[UINT]] [[PTR]] +// CHECK: [[IMG:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[IMG]] [[U0]] [[VAL]] +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1024, 1, 1)] +[NodeLaunch("coalescing")] +void node03([MaxRecords(3)] GroupNodeInputRecords input) +{ + buf0[0] = input[1].c; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[U1]] [[S2]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad [[UINT]] [[PTR]] +// CHECK: [[IMG:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[IMG]] [[U0]] [[VAL]] +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1,1,1)] +[NodeLaunch("coalescing")] +void node04([MaxRecords(4)] RWGroupNodeInputRecords input) +{ + buf0[0] = input[2].c; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[U2]] [[S2]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad [[UINT]] [[PTR]] +// CHECK: [[IMG:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[IMG]] [[U0]] [[VAL]] +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1,1,1)] +[NodeLaunch("coalescing")] +void node05(NodeOutput output) +{ + ThreadNodeOutputRecords outrec = output.GetThreadNodeOutputRecords(1); + buf0[0] = outrec.Get().a; +} + +// CHECK: OpFunction +// CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX %{{[^ ]*}} [[U4]] [[U1]] [[U0]] +// CHECK: [[TEMP:%[^ ]*]] = OpLoad %{{[^ ]*}} [[PAY]] +// CHECK: OpStore [[OUT:%[^ ]*]] [[TEMP]] +// CHECK: [[PTR1:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[OUT]] [[U0]] +// CHECK: [[PTR2:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[PTR1]] [[S0]] +// CHECK-DAG: [[VAL:%[^ ]*]] = OpLoad [[UINT]] [[PTR2]] +// CHECK-DAG: [[IMG:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[IMG]] [[U0]] [[VAL]] +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1,1,1)] +[NodeLaunch("coalescing")] +void node06(NodeOutput output) +{ + ThreadNodeOutputRecords outrec = output.GetThreadNodeOutputRecords(7); + buf0[0] = outrec[2].b; +} + +// CHECK: OpFunction +// CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX %{{[^ ]*}} [[U4]] [[U7]] [[U0]] +// CHECK: [[TEMP:%[^ ]*]] = OpLoad %{{[^ ]*}} [[PAY]] +// CHECK: OpStore [[OUT:%[^ ]*]] [[TEMP]] +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[OUT]] [[U2]] [[S1]] +// CHECK-DAG: [[VAL:%[^ ]*]] = OpLoad [[UINT]] [[PTR]] +// CHECK-DAG: [[IMG:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[IMG]] [[U0]] [[VAL]] +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1,1,1)] +[NodeLaunch("coalescing")] +void node07(NodeOutput output) +{ + GroupNodeOutputRecords outrec = output.GetGroupNodeOutputRecords(1); + buf0[0] = outrec.Get().c; +} + +// CHECK: OpFunction +// CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX %{{[^ ]*}} [[U2]] [[U1]] [[U0]] +// CHECK: [[TEMP:%[^ ]*]] = OpLoad %{{[^ ]*}} [[PAY]] +// CHECK: OpStore [[OUT:%[^ ]*]] [[TEMP]] +// CHECK: [[PTR1:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[OUT]] [[U0]] +// CHECK: [[PTR2:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[PTR1]] [[S2]] +// CHECK-DAG: [[VAL:%[^ ]*]] = OpLoad [[UINT]] [[PTR2]] +// CHECK-DAG: [[IMG:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[IMG]] [[U0]] [[VAL]] +// CHECK: OpFunctionEnd diff --git a/tools/clang/test/CodeGenSPIRV/node.member.read.types.hlsl b/tools/clang/test/CodeGenSPIRV/node.member.read.types.hlsl new file mode 100644 index 0000000000..5f7d434bd2 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.member.read.types.hlsl @@ -0,0 +1,193 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 -enable-16bit-types %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Read access of members of input/output record with different type +// sizes - we check the function specializations generated + +RWBuffer buf0; + +struct RECORD +{ + half h; + float f; + double d; + bool b; + uint16_t i16; + int i; + int64_t i64; + uint64_t u64; +}; + +// CHECK: OpName [[BUF0:%[^ ]*]] "buf0" +// CHECK-DAG: OpName [[RECORD:%[^ ]*]] "RECORD" +// CHECK-DAG: OpMemberName [[RECORD]] 0 "h" +// CHECK-DAG: OpMemberName [[RECORD]] 1 "f" +// CHECK-DAG: OpMemberName [[RECORD]] 2 "d" +// CHECK-DAG: OpMemberName [[RECORD]] 3 "b" +// CHECK-DAG: OpMemberName [[RECORD]] 4 "i16" +// CHECK-DAG: OpMemberName [[RECORD]] 5 "i" +// CHECK-DAG: OpMemberName [[RECORD]] 6 "i64" +// CHECK-DAG: OpMemberName [[RECORD]] 7 "u64" + +// CHECK-DAG: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[INT:%[^ ]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[S0:%[^ ]*]] = OpConstant [[INT]] 0 +// CHECK-DAG: [[S1:%[^ ]*]] = OpConstant [[INT]] 1 +// CHECK-DAG: [[S2:%[^ ]*]] = OpConstant [[INT]] 2 +// CHECK-DAG: [[S3:%[^ ]*]] = OpConstant [[INT]] 3 +// CHECK-DAG: [[S4:%[^ ]*]] = OpConstant [[INT]] 4 +// CHECK-DAG: [[S5:%[^ ]*]] = OpConstant [[INT]] 5 +// CHECK-DAG: [[S6:%[^ ]*]] = OpConstant [[INT]] 6 +// CHECK-DAG: [[S7:%[^ ]*]] = OpConstant [[INT]] 7 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[TBI:%[^ ]*]] = OpTypeImage [[UINT]] Buffer + +// CHECK-DAG: [[HALF:%[^ ]*]] = OpTypeFloat 16 +// CHECK-DAG: [[FLOAT:%[^ ]*]] = OpTypeFloat 32 +// CHECK-DAG: [[DOUBLE:%[^ ]*]] = OpTypeFloat 64 +// CHECK-DAG: [[USHORT:%[^ ]*]] = OpTypeInt 16 0 +// CHECK-DAG: [[LONG:%[^ ]*]] = OpTypeInt 64 1 +// CHECK-DAG: [[ULONG:%[^ ]*]] = OpTypeInt 64 0 +// CHECK: [[RECORD]] = OpTypeStruct [[HALF]] [[FLOAT]] [[DOUBLE]] [[UINT]] [[USHORT]] [[INT]] [[LONG]] [[ULONG]] +// CHECK: [[BOOL:%[^ ]*]] = OpTypeBool + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node01(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().h; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S0]] +// CHECK: [[VAL0:%[^ ]*]] = OpLoad [[HALF]] [[PTR]] +// CHECK: [[VAL1:%[^ ]*]] = OpConvertFToU [[UINT]] [[VAL0]] +// CHECK: [[VAL2:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[VAL2]] [[U0]] [[VAL1]] None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node02(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().f; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S1]] +// CHECK: [[VAL0:%[^ ]*]] = OpLoad [[FLOAT]] [[PTR]] +// CHECK: [[VAL1:%[^ ]*]] = OpConvertFToU [[UINT]] [[VAL0]] +// CHECK: [[VAL2:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[VAL2]] [[U0]] [[VAL1]] None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node03(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().d; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S2]] +// CHECK: [[VAL0:%[^ ]*]] = OpLoad [[DOUBLE]] [[PTR]] +// CHECK: [[VAL1:%[^ ]*]] = OpConvertFToU [[UINT]] [[VAL0]] +// CHECK: [[VAL2:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[VAL2]] [[U0]] [[VAL1]] None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node04(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().b; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S3]] +// CHECK: [[VAL0:%[^ ]*]] = OpLoad [[UINT]] [[PTR]] +// CHECK: [[VAL1:%[^ ]*]] = OpINotEqual [[BOOL]] [[VAL0]] [[U0]] +// CHECK: [[VAL2:%[^ ]*]] = OpSelect [[UINT]] [[VAL1]] [[U1]] [[U0]] +// CHECK: [[VAL3:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[VAL3]] [[U0]] [[VAL2]] None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node05(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().i16; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S4]] +// CHECK: [[VAL0:%[^ ]*]] = OpLoad [[USHORT]] [[PTR]] +// CHECK: [[VAL1:%[^ ]*]] = OpUConvert [[UINT]] [[VAL0]] +// CHECK: [[VAL2:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[VAL2]] [[U0]] [[VAL1]] None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node06(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().i; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S5]] +// CHECK: [[VAL0:%[^ ]*]] = OpLoad [[INT]] [[PTR]] +// CHECK: [[VAL1:%[^ ]*]] = OpBitcast [[UINT]] [[VAL0]] +// CHECK: [[VAL2:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[VAL2]] [[U0]] [[VAL1]] None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node07(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().i64; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S6]] +// CHECK: [[VAL0:%[^ ]*]] = OpLoad [[LONG]] [[PTR]] +// CHECK: [[VAL1:%[^ ]*]] = OpSConvert [[INT]] [[VAL0]] +// CHECK: [[VAL2:%[^ ]*]] = OpBitcast [[UINT]] [[VAL1]] +// CHECK: [[VAL3:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[VAL3]] [[U0]] [[VAL2]] None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node08(DispatchNodeInputRecord input) +{ + buf0[0] = input.Get().u64; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S7]] +// CHECK: [[VAL0:%[^ ]*]] = OpLoad [[ULONG]] [[PTR]] +// CHECK: [[VAL1:%[^ ]*]] = OpUConvert [[UINT]] [[VAL0]] +// CHECK: [[VAL2:%[^ ]*]] = OpLoad [[TBI]] [[BUF0]] +// CHECK: OpImageWrite [[VAL2]] [[U0]] [[VAL1]] None +// CHECK: OpFunctionEnd + diff --git a/tools/clang/test/CodeGenSPIRV/node.member.write.hlsl b/tools/clang/test/CodeGenSPIRV/node.member.write.hlsl new file mode 100644 index 0000000000..33fc2dd9ff --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.member.write.hlsl @@ -0,0 +1,88 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments + +// Writes to members of the various read-write node records + +struct RECORD +{ + uint a; + uint b; +}; + +// CHECK-DAG: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[INT:%[^ ]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[S0:%[^ ]*]] = OpConstant [[INT]] 0 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[S1:%[^ ]*]] = OpConstant [[INT]] 1 +// CHECK-DAG: [[U2:%[^ ]*]] = OpConstant [[UINT]] 2 +// CHECK-DAG: [[U4:%[^ ]*]] = OpConstant [[UINT]] 4 +// CHECK-DAG: [[U5:%[^ ]*]] = OpConstant [[UINT]] 5 +// CHECK-DAG: [[U7:%[^ ]*]] = OpConstant [[UINT]] 7 +// CHECK-DAG: [[U8:%[^ ]*]] = OpConstant [[UINT]] 8 +// CHECK-DAG: [[U9:%[^ ]*]] = OpConstant [[UINT]] 9 +// CHECK-DAG: [[U11:%[^ ]*]] = OpConstant [[UINT]] 11 + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node01(RWDispatchNodeInputRecord input1) +{ + input1.Get().a = 5; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[S0]] +// CHECK: OpStore [[PTR]] [[U5]] +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(2,1,1)] +[NodeLaunch("coalescing")] +void node02([MaxRecords(4)] RWGroupNodeInputRecords input2) +{ + input2[1].b = 7; +} + +// CHECK: OpFunction +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} %{{[^ ]*}} [[U1]] [[S1]] +// CHECK: OpStore [[PTR]] [[U7]] +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(3,1,1)] +[NodeLaunch("coalescing")] +void node03(NodeOutput output) +{ + ThreadNodeOutputRecords output3 = output.GetThreadNodeOutputRecords(2); + output3.Get().b = 9; +} + +// CHECK: OpFunction +// CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX %{{[^ ]*}} [[U4]] [[U2]] [[U0]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad %{{[^ ]*}} [[PAY]] +// CHECK: OpStore [[OUT:%[^ ]*]] [[VAL]] +// CHECK: [[PTR0:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[OUT]] [[U0]] +// CHECK: [[PTR1:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[PTR0]] [[S1]] +// CHECK: OpStore [[PTR1]] [[U9]] +// CHECK: OpFunctionEnd + +[Shader("node")] +[NumThreads(4,1,1)] +[NodeLaunch("coalescing")] +void node04(NodeOutput output) +{ + GroupNodeOutputRecords output4 = output.GetGroupNodeOutputRecords(8); + output4[0].a = 11; +} + +// CHECK: OpFunction +// CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX %{{[^ ]*}} [[U2]] [[U8]] [[U0]] +// CHECK: [[VAL:%[^ ]*]] = OpLoad %{{[^ ]*}} [[PAY]] +// CHECK: OpStore [[OUT:%[^ ]*]] [[VAL]] +// CHECK: [[PTR:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[OUT]] [[U0]] [[S0]] +// CHECK: OpStore [[PTR]] [[U11]] +// CHECK: OpFunctionEnd + diff --git a/tools/clang/test/CodeGenSPIRV/node.member.write.matrix.hlsl b/tools/clang/test/CodeGenSPIRV/node.member.write.matrix.hlsl new file mode 100644 index 0000000000..d875f27d4e --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.member.write.matrix.hlsl @@ -0,0 +1,123 @@ +// RUN: %dxc -spirv -Vd -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// Note: validation disabled until NodePayloadAMDX pointers are allowed +// as function arguments +// ================================================================== +// Test writing to matrix members of node records +// ================================================================== + +// CHECK: OpName [[NODE01:%[^ ]*]] "node01" +// CHECK: OpName [[INPUT1:%[^ ]*]] "input1" +// CHECK: OpName [[NODE02:%[^ ]*]] "node02" +// CHECK: OpName [[INPUT2:%[^ ]*]] "input2" +// CHECK: OpName [[NODE03:%[^ ]*]] "node03" +// CHECK: OpName [[OUTPUT3:%[^ ]*]] "output3" +// CHECK: OpName [[NODE04:%[^ ]*]] "node04" +// CHECK: OpName [[OUTPUTS4:%[^ ]*]] "outputs4" + +struct RECORD +{ + row_major float2x2 m0; + row_major float2x2 m1; + column_major float2x2 m2; +}; + +// CHECK-DAG: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U64:%[^ ]*]] = OpConstant [[UINT]] 64 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[FLOAT:%[^ ]*]] = OpTypeFloat 32 +// CHECK-DAG: [[F111:%[^ ]*]] = OpConstant [[FLOAT]] 111 +// CHECK-DAG: [[V2FLOAT:%[^ ]*]] = OpTypeVector [[FLOAT]] 2 +// CHECK-DAG: [[C1:%[^ ]*]] = OpConstantComposite [[V2FLOAT]] [[F111]] [[F111]] +// CHECK-DAG: [[MAT2V2FLOAT:[^ ]*]] = OpTypeMatrix [[V2FLOAT]] 2 +// CHECK-DAG: [[M1:%[^ ]*]] = OpConstantComposite [[MAT2V2FLOAT]] [[C1]] [[C1]] +// CHECK-DAG: [[INT:%[^ ]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[I1:%[^ ]*]] = OpConstant [[INT]] 1 +// CHECK-DAG: [[I0:%[^ ]*]] = OpConstant [[INT]] 0 +// CHECK-DAG: [[I2:%[^ ]*]] = OpConstant [[INT]] 2 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[F222:%[^ ]*]] = OpConstant [[FLOAT]] 222 +// CHECK-DAG: [[C2:%[^ ]*]] = OpConstantComposite [[V2FLOAT]] [[F222]] [[F222]] +// CHECK-DAG: [[M2:%[^ ]*]] = OpConstantComposite [[MAT2V2FLOAT]] [[C2]] [[C2]] +// CHECK-DAG: [[U4:%[^ ]*]] = OpConstant [[UINT]] 4 +// CHECK-DAG: [[U2:%[^ ]*]] = OpConstant [[UINT]] 2 + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(64,1,1)] +void node01(RWDispatchNodeInputRecord input1) +{ + // CHECK: [[NODE01]] = OpFunction + // CHECK: [[P0:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[INPUT1]] [[U0]] + // CHECK: [[P1:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[P0]] [[I1]] + // CHECK: OpStore [[P1]] [[M1]] + // CHECK: [[P0:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[INPUT1]] [[U0]] + // CHECK: [[P2:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[P0]] [[I0]] + // CHECK: [[VAL:%[^ ]*]] = OpLoad [[MAT2V2FLOAT]] [[P2]] + // CHECK: [[P0:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[INPUT1]] [[U0]] + // CHECK: [[P3:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[P0]] [[I2]] + // CHECK: OpStore [[P3]] [[VAL]] + // CHECK: OpFunctionEnd + input1.Get().m1 = 111; + input1.Get().m2 = input1.Get().m0; +} + +[Shader("node")] +[NumThreads(1,1,1)] +[NodeLaunch("coalescing")] +void node02([MaxRecords(4)] RWGroupNodeInputRecords input2) +{ + // CHECK: [[NODE02]] = OpFunction + // CHECK: [[P1:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[INPUT2]] [[U0]] [[I1]] + // CHECK: OpStore [[P1]] [[M1]] + // CHECK: [[P2:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[INPUT2]] [[U1]] [[I0]] + // CHECK: [[VAL:%[^ ]*]] = OpLoad [[MAT2V2FLOAT]] [[P2]] + // CHECK: [[P3:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[INPUT2]] [[U1]] [[I2]] + // CHECK: OpStore [[P3]] [[VAL]] + // CHECK: OpFunctionEnd + input2[0].m1 = 111; + input2[1].m2 = input2[1].m0; +} + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeDispatchGrid(64,1,1)] +[NodeLaunch("broadcasting")] +void node03(NodeOutput output3) +{ + // CHECK: [[NODE03]] = OpFunction + // CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX %{{[^ ]*}} [[U4]] [[U1]] [[U0]] + // CHECK: [[VAL:%[^ ]*]] = OpLoad %{{[^ ]*}} [[PAY]] + // CHECK: OpStore [[OUTREC3:%[^ ]*]] [[VAL]] + // CHECK: [[P0:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[OUTREC3]] [[U0]] + // CHECK: [[P1:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[P0]] [[I1]] + // CHECK: OpStore [[P1]] [[M1]] + // CHECK: [[P0:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[OUTREC3]] [[U0]] + // CHECK: [[P2:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[P0]] [[I2]] + // CHECK: OpStore [[P2]] [[M2]] + // CHECK: OpFunctionEnd + ThreadNodeOutputRecords outrec = output3.GetThreadNodeOutputRecords(1); + outrec.Get().m1 = 111; + outrec.Get().m2 = 222; +} + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("coalescing")] +void node04([MaxRecords(5)] NodeOutput outputs4) +{ + // CHECK: [[NODE04]] = OpFunction + // CHECK: [[PAY:%[^ ]*]] = OpAllocateNodePayloadsAMDX %{{[^ ]*}} [[U2]] [[U1]] [[U0]] + // CHECK: [[VAL:%[^ ]*]] = OpLoad %{{[^ ]*}} [[PAY]] + // CHECK: OpStore [[OUTREC4:%[^ ]*]] [[VAL]] + // CHECK: [[P0:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[OUTREC4]] [[U0]] + // CHECK: [[P1:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[P0]] [[I1]] + // CHECK: OpStore [[P1]] [[M1]] + // CHECK: [[P0:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[OUTREC4]] [[U0]] + // CHECK: [[P2:%[^ ]*]] = OpAccessChain %{{[^ ]*}} [[P0]] [[I2]] + // CHECK: OpStore [[P2]] [[M2]] + // CHECK: OpFunctionEnd + GroupNodeOutputRecords outrec = outputs4.GetGroupNodeOutputRecords(1); + outrec.Get().m1 = 111; + outrec.Get().m2 = 222; +} diff --git a/tools/clang/test/CodeGenSPIRV/node.member.write.types.hlsl b/tools/clang/test/CodeGenSPIRV/node.member.write.types.hlsl new file mode 100644 index 0000000000..ec95c3d758 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.member.write.types.hlsl @@ -0,0 +1,150 @@ +// RUN: %dxc -spirv -T lib_6_8 -fspv-target-env=vulkan1.3 -enable-16bit-types %s | FileCheck %s + +// Writes to node record members of various types + + +struct RECORD +{ + half h; + float f; + double d; + bool b; + int16_t i16; + uint16_t u16; + int i; + int64_t i64; + uint64_t u64; + float3 f3; + int ia[7]; +}; + +// CHECK: OpName [[RECORD:%[^ ]*]] "RECORD" +// CHECK: OpMemberName [[RECORD]] 0 "h" +// CHECK: OpMemberName [[RECORD]] 1 "f" +// CHECK: OpMemberName [[RECORD]] 2 "d" +// CHECK: OpMemberName [[RECORD]] 3 "b" +// CHECK: OpMemberName [[RECORD]] 4 "i16" +// CHECK: OpMemberName [[RECORD]] 5 "u16" +// CHECK: OpMemberName [[RECORD]] 6 "i" +// CHECK: OpMemberName [[RECORD]] 7 "i64" +// CHECK: OpMemberName [[RECORD]] 8 "u64" +// CHECK: OpMemberName [[RECORD]] 9 "f3" +// CHECK: OpMemberName [[RECORD]] 10 "ia" + +// CHECK-DAG: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[HALF:%[^ ]*]] = OpTypeFloat 16 +// CHECK-DAG: [[INT:%[^ ]*]] = OpTypeInt 32 1 +// CHECK-DAG: [[FLOAT:%[^ ]*]] = OpTypeFloat 32 +// CHECK-DAG: [[DOUBLE:%[^ ]*]] = OpTypeFloat 64 +// CHECK-DAG: [[SHORT:%[^ ]*]] = OpTypeInt 16 1 +// CHECK-DAG: [[USHORT:%[^ ]*]] = OpTypeInt 16 0 +// CHECK-DAG: [[LONG:%[^ ]*]] = OpTypeInt 64 1 +// CHECK-DAG: [[ULONG:%[^ ]*]] = OpTypeInt 64 0 +// CHECK-DAG: [[V3FLOAT:%[^ ]*]] = OpTypeVector [[FLOAT]] 3 + +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[HALF_0X1_8P_1:%[^ ]*]] = OpConstant [[HALF]] 0x1.8p+1 +// CHECK-DAG: [[I0:%[^ ]*]] = OpConstant [[INT]] 0 +// CHECK-DAG: [[FN5:%[^ ]*]] = OpConstant [[FLOAT]] -5 +// CHECK-DAG: [[I1:%[^ ]*]] = OpConstant [[INT]] 1 +// CHECK-DAG: [[D7:%[^ ]*]] = OpConstant [[DOUBLE]] 7 +// CHECK-DAG: [[I2:%[^ ]*]] = OpConstant [[INT]] 2 +// CHECK-DAG: [[I3:%[^ ]*]] = OpConstant [[INT]] 3 +// CHECK-DAG: [[S11:%[^ ]*]] = OpConstant [[SHORT]] 11 +// CHECK-DAG: [[I4:%[^ ]*]] = OpConstant [[INT]] 4 +// CHECK-DAG: [[US13:%[^ ]*]] = OpConstant [[USHORT]] 13 +// CHECK-DAG: [[I5:%[^ ]*]] = OpConstant [[INT]] 5 +// CHECK-DAG: [[I17:%[^ ]*]] = OpConstant [[INT]] 17 +// CHECK-DAG: [[I6:%[^ ]*]] = OpConstant [[INT]] 6 +// CHECK-DAG: [[LN19:%[^ ]*]] = OpConstant [[LONG]] -19 +// CHECK-DAG: [[I7:%[^ ]*]] = OpConstant [[INT]] 7 +// CHECK-DAG: [[UL21:%[^ ]*]] = OpConstant [[ULONG]] 21 +// CHECK-DAG: [[I8:%[^ ]*]] = OpConstant [[INT]] 8 +// CHECK-DAG: [[F23:%[^ ]*]] = OpConstant [[FLOAT]] 23 +// CHECK-DAG: [[I9:%[^ ]*]] = OpConstant [[INT]] 9 +// CHECK-DAG: [[I29:%[^ ]*]] = OpConstant [[INT]] 29 +// CHECK-DAG: [[I10:%[^ ]*]] = OpConstant [[INT]] 10 +// CHECK-DAG: [[U7:%[^ ]*]] = OpConstant [[UINT]] 7 + +// CHECK-DAG: [[AI7:%[^ ]*]] = OpTypeArray [[INT]] [[U7]] +// CHECK-DAG: [[RECORD]] = OpTypeStruct [[HALF]] [[FLOAT]] [[DOUBLE]] [[UINT]] [[SHORT]] [[USHORT]] [[INT]] [[LONG]] [[ULONG]] [[V3FLOAT]] [[AI7]] +// CHECK-DAG: [[RAR:%[^ ]*]] = OpTypeNodePayloadArrayAMDX %RECORD +// CHECK-DAG: [[RARP:%[^ ]*]] = OpTypePointer NodePayloadAMDX [[RAR]] +// CHECK-DAG: [[U2:%[^ ]*]] = OpConstant [[UINT]] 2 +// CHECK-DAG: [[HALFP:%[^ ]*]] = OpTypePointer Function [[HALF]] +// CHECK-DAG: [[FLOATP:%[^ ]*]] = OpTypePointer Function [[FLOAT]] +// CHECK-DAG: [[DOUBLEP:%[^ ]*]] = OpTypePointer Function [[DOUBLE]] +// CHECK-DAG: [[UINTP:%[^ ]*]] = OpTypePointer Function [[UINT]] +// CHECK-DAG: [[SHORTP:%[^ ]*]] = OpTypePointer Function [[SHORT]] +// CHECK-DAG: [[USHORTP:%[^ ]*]] = OpTypePointer Function [[USHORT]] +// CHECK-DAG: [[INTP:%[^ ]*]] = OpTypePointer Function [[INT]] +// CHECK-DAG: [[LONGP:%[^ ]*]] = OpTypePointer Function [[LONG]] +// CHECK-DAG: [[ULONGP:%[^ ]*]] = OpTypePointer Function [[ULONG]] + +[Shader("node")] +[NumThreads(1024,1,1)] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(512,1,1)] +void node125(NodeOutput output) +{ + GroupNodeOutputRecords output01 = output.GetGroupNodeOutputRecords(1); + // CHECK: OpAllocateNodePayloadsAMDX [[RARP]] [[U2]] [[U1]] [[U0]] + + output01.Get().h = 3.0; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[HALFP]] + // CHECK-SAME: [[I0]] + // CHECK: OpStore [[PTR]] [[HALF_0X1_8P_1]] + + output01.Get().f = -5.0; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[FLOATP]] + // CHECK-SAME: [[I1]] + // CHECK: OpStore [[PTR]] [[FN5]] + + output01.Get().d = 7.0; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[DOUBLEP]] + // CHECK-SAME: [[I2]] + // CHECK: OpStore [[PTR]] [[D7]] + + output01.Get().b = true; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[UINTP]] + // CHECK-SAME: [[I3]] + // CHECK: OpStore [[PTR]] [[U1]] + + output01.Get().i16 = 11; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[SHORTP]] + // CHECK-SAME: [[I4]] + // CHECK: OpStore [[PTR]] [[S11]] + + output01.Get().u16 = 13; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[USHORTP]] + // CHECK-SAME: [[I5]] + // CHECK: OpStore [[PTR]] [[US13]] + + output01.Get().i = 17; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[INTP]] + // CHECK-SAME: [[I6]] + // CHECK: OpStore [[PTR]] [[I17]] + + output01.Get().i64 = -19; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[LONGP]] + // CHECK-SAME: [[I7]] + // CHECK: OpStore [[PTR]] [[LN19]] + + output01.Get().u64 = 21; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[ULONGP]] + // CHECK-SAME: [[I8]] + // CHECK: OpStore [[PTR]] [[UL21]] + + output01.Get().f3.y = 23; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[FLOATP]] + // CHECK-SAME: [[I9]] + // CHECK-SAME: [[I1]] + // CHECK: OpStore [[PTR]] [[F23]] + + output01.Get().ia[5] = 29; + // CHECK: [[PTR:%[^ ]*]] = OpAccessChain [[INTP]] + // CHECK-SAME: [[I10]] + // CHECK-SAME: [[I5]] + // CHECK: OpStore [[PTR]] [[I29]] +} diff --git a/tools/clang/test/CodeGenSPIRV/node.mesh.hlsl b/tools/clang/test/CodeGenSPIRV/node.mesh.hlsl new file mode 100644 index 0000000000..4d1726abb2 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.mesh.hlsl @@ -0,0 +1,88 @@ +// RUN: %dxc -spirv -T lib_6_9 -fspv-target-env=vulkan1.3 %s | FileCheck %s +// XFAIL: * +// disabled until mesh nodes are implemented + +// Test loading of node input and funneling into mesh outputs +// Essentially an end-to-end mesh node test. + + +RWBuffer buf0; + +#define MAX_VERT 32 +#define MAX_PRIM 16 + +struct MeshPerVertex { + float4 position : SV_Position; + float color[4] : COLOR; +}; + +struct MeshPerPrimitive { + float normal : NORMAL; + float malnor : MALNOR; + float alnorm : ALNORM; + float ormaln : ORMALN; + int layer[6] : LAYER; +}; + +struct MeshPayload { + float normal; + float malnor; + float alnorm; + float ormaln; + int layer[6]; +}; + +groupshared float gsMem[MAX_PRIM]; + +[Shader("node")] +[NodeLaunch("mesh")] +[outputtopology("triangle")] +[numthreads(128, 1, 1)] +[NodeDispatchGrid(64,1,1)] +void node_setmeshoutputcounts(DispatchNodeInputRecord mpl, + out indices uint3 primIndices[MAX_PRIM], + out vertices MeshPerVertex verts[MAX_VERT], + out primitives MeshPerPrimitive prims[MAX_PRIM], + in uint tig : SV_GroupIndex) { + SetMeshOutputCounts(32, 16); + + // create mpl + + MeshPerVertex ov; + ov.position = float4(14.0,15.0,16.0,17.0); + ov.color[0] = 14.0; + ov.color[1] = 15.0; + ov.color[2] = 16.0; + ov.color[3] = 17.0; + + if (tig % 3) { + primIndices[tig / 3] = uint3(tig, tig + 1, tig + 2); + + MeshPerPrimitive op; + op.normal = mpl.Get().normal; + op.malnor = gsMem[tig / 3 + 1]; + op.alnorm = mpl.Get().alnorm; + op.ormaln = mpl.Get().ormaln; + op.layer[0] = mpl.Get().layer[0]; + op.layer[1] = mpl.Get().layer[1]; + op.layer[2] = mpl.Get().layer[2]; + op.layer[3] = mpl.Get().layer[3]; + op.layer[4] = mpl.Get().layer[4]; + op.layer[5] = mpl.Get().layer[5]; + + gsMem[tig / 3] = op.normal; + prims[tig / 3] = op; + } + verts[tig] = ov; +} + +// CHECK: OpEntryPoint MeshExt [[ENTRY:%[^ ]*]] +// CHECK-DAG: OpExecutionMode [[ENTRY]] OutputVertices 32 +// CHECK-DAG: OpExecutionMode [[ENTRY]] OutputPrimitivesNV 16 +// CHECK-DAG: OpExecutionMode [[ENTRY]] OutputTrianglesNV +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U16:%[^ ]*]] = OpConstant [[UINT]] 16 +// CHECK-DAG: [[U32:%[^ ]*]] = OpConstant [[UINT]] 32 +// CHECK: [[ENTRY]] = OpFunction +// CHECK: OpSetMeshOutputsEXT [[U32]] [[U16]] +// CHECK: OpFunctionEnd diff --git a/tools/clang/test/CodeGenSPIRV/node.output-complete.hlsl b/tools/clang/test/CodeGenSPIRV/node.output-complete.hlsl new file mode 100644 index 0000000000..17db15e7db --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.output-complete.hlsl @@ -0,0 +1,33 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// OutputComplete() is called with NodeOutput + +struct OUTPUT_RECORD +{ + uint value; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(256,1,1)] +[NumThreads(1024,1,1)] +void outputcomplete([MaxRecords(256)] NodeOutput output) +{ + ThreadNodeOutputRecords outputrecords = output.GetThreadNodeOutputRecords(1); + // ... + outputrecords.OutputComplete(); +} + +// CHECK: OpName [[RECORDS:%[^ ]*]] "outputrecords" +// CHECK: OpDecorateId [[ARR:%[^ ]*]] PayloadNodeNameAMDX [[STR:%[0-9A-Za-z_]*]] +// CHECK-DAG: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U1:%[^ ]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[REC:%[^ ]*]] = OpTypeStruct [[UINT]] +// CHECK-DAG: [[ARR:%[^ ]*]] = OpTypeNodePayloadArrayAMDX [[REC]] +// CHECK-DAG: [[PTR:%[^ ]*]] = OpTypePointer NodePayloadAMDX [[ARR]] +// CHECK-DAG: [[U4:[^ ]*]] = OpConstant [[UINT]] 4 +// CHECK: [[V0:%[^ ]*]] = OpAllocateNodePayloadsAMDX [[PTR]] [[U4]] [[U1]] [[U0]] +// CHECK: [[V1:%[^ ]*]] = OpLoad [[ARR]] [[V0]] +// CHECK: OpStore [[RECORDS]] [[V1]] +// CHECK: OpEnqueueNodePayloadsAMDX [[RECORDS]] diff --git a/tools/clang/test/CodeGenSPIRV/node.output.is-valid.empty.hlsl b/tools/clang/test/CodeGenSPIRV/node.output.is-valid.empty.hlsl new file mode 100644 index 0000000000..08a103cf5e --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.output.is-valid.empty.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxc -spirv -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// NodeOutputIsValid() is called with EmptyNodeOutput + +RWBuffer buf0; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(256,1,1)] +[NumThreads(1,1,1)] +void node131_nodeoutputisvalid_emptynodeoutput(EmptyNodeOutput output) +{ + buf0[0] = output.IsValid(); +} + +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK: [[BOOL:%[^ ]*]] = OpTypeBool +// CHECK: OpIsNodePayloadValidAMDX [[BOOL]] %{{[^ ]*}} [[U0]] diff --git a/tools/clang/test/CodeGenSPIRV/node.output.is-valid.hlsl b/tools/clang/test/CodeGenSPIRV/node.output.is-valid.hlsl new file mode 100644 index 0000000000..40e3a74fcb --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.output.is-valid.hlsl @@ -0,0 +1,24 @@ +// RUN: %dxc -spirv -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// IsValid() is invoked on NodeOutput + +RWBuffer buf0; + +struct RECORD +{ + uint value; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(256,1,1)] +[NumThreads(1,1,1)] +void node129_nodeoutputisvalid_nodeoutput(NodeOutput output) +{ + buf0[0] = output.IsValid(); +} + +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK: [[BOOL:%[^ ]*]] = OpTypeBool +// CHECK: OpIsNodePayloadValidAMDX [[BOOL]] %{{[^ ]*}} [[U0]] diff --git a/tools/clang/test/CodeGenSPIRV/node.renamed.hlsl b/tools/clang/test/CodeGenSPIRV/node.renamed.hlsl new file mode 100644 index 0000000000..265fd6c17f --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.renamed.hlsl @@ -0,0 +1,28 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 -fspv-target-env=vulkan1.3 external %s | FileCheck %s + +// Renamed node, unnamed index defaults to 0 + +struct RECORD { + uint i; +}; + +[Shader("node")] +[NodeLaunch("thread")] +[NodeID("new_node_name")] +[NodeIsProgramEntry] +void node017_renamed_node([NodeID("output_node_name", 2)] NodeOutput r) +{ + ThreadNodeOutputRecords records = r.GetThreadNodeOutputRecords(1); + records.OutputComplete(); +} + +// CHECK: OpEntryPoint GLCompute %{{[^ ]*}} "node017_renamed_node" +// CHECK-DAG: OpDecorateId [[TYPE:%[^ ]*]] PayloadNodeNameAMDX [[STR:%[0-9A-Za-z_]*]] +// CHECK-DAG: OpDecorateId [[TYPE]] PayloadNodeBaseIndexAMDX [[U2:%[0-9A-Za-z_]*]] +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[STR]] = OpConstantStringAMDX "output_node_name" +// CHECK-DAG: [[U0:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[U1:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 1 +// CHECK-DAG: [[U2]] = OpConstant [[UINT]] 2 +// CHECK-DAG: [[U4:%[_0-9A-Za-z]*]] = OpConstant [[UINT]] 4 +// CHECK: OpAllocateNodePayloadsAMDX %{{[^ ]*}} [[U4]] [[U1]] [[U0]] diff --git a/tools/clang/test/CodeGenSPIRV/node.share-input.hlsl b/tools/clang/test/CodeGenSPIRV/node.share-input.hlsl new file mode 100644 index 0000000000..c439bef017 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.share-input.hlsl @@ -0,0 +1,42 @@ +// RUN: %dxc -spirv -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// Check that the NodeShareInputOf metadata entry is populated correctly + +struct entryRecord +{ + int data0; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(2, 1, 1)] +[NumThreads(1, 1, 1)] +void firstNode(DispatchNodeInputRecord inputData) +{ } + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(2, 1, 1)] +[NumThreads(1, 1, 1)] +[NodeShareInputOf("firstNode")] +void secondNode(DispatchNodeInputRecord inputData) +{ } + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(2, 1, 1)] +[NumThreads(1, 1, 1)] +[NodeShareInputOf("firstNode", 3)] +void thirdNode(DispatchNodeInputRecord inputData) +{ } + + +// CHECK: OpEntryPoint GLCompute %firstNode "firstNode" +// CHECK: OpEntryPoint GLCompute %secondNode "secondNode" +// CHECK: OpEntryPoint GLCompute %thirdNode "thirdNode" +// CHECK-NOT: OpExecutionModeId %firstNode SharesInputWithAMDX +// CHECK: OpExecutionModeId %secondNode SharesInputWithAMDX [[STR:%[^ ]*]] [[U0:%[^ ]*]] +// CHECK: OpExecutionModeId %thirdNode SharesInputWithAMDX [[STR]] [[U3:%[^ ]*]] +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK-DAG: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK-DAG: [[U3:%[^ ]*]] = OpConstant [[UINT]] 3 diff --git a/tools/clang/test/CodeGenSPIRV/node.sparse-nodes.hlsl b/tools/clang/test/CodeGenSPIRV/node.sparse-nodes.hlsl new file mode 100644 index 0000000000..ca3c14b8da --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.sparse-nodes.hlsl @@ -0,0 +1,141 @@ +// RUN: %dxc -spirv -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +struct RECORD1 +{ + uint value; + uint value2; +}; + +// CHECK: OpEntryPoint GLCompute [[NODE10:%[^ ]*]] "node_1_0" +// CHECK: OpEntryPoint GLCompute [[NODE11:%[^ ]*]] "node_1_1" +// CHECK: OpEntryPoint GLCompute [[NODE12:%[^ ]*]] "node_1_2" +// CHECK: OpEntryPoint GLCompute [[NODE20:%[^ ]*]] "node_2_0" +// CHECK: OpEntryPoint GLCompute [[NODE21:%[^ ]*]] "node_2_1" +// CHECK: OpEntryPoint GLCompute [[NODE22:%[^ ]*]] "node_2_2" +// CHECK: OpDecorateId [[A10:%[^ ]*]] PayloadNodeNameAMDX [[S10:%[^ ]*]] +// CHECK: OpDecorateId [[A10]] NodeMaxPayloadsAMDX [[U31:%[^ ]*]] +// CHECK: OpDecorate [[A10]] PayloadNodeSparseArrayAMDX +// CHECK: OpDecorateId [[A10]] PayloadNodeArraySizeAMDX [[U129:%[^ ]*]] +// CHECK: OpDecorateId [[A11:%[^ ]*]] PayloadNodeNameAMDX [[S11:%[^ ]*]] +// CHECK: OpDecorateId [[A11]] NodeMaxPayloadsAMDX [[U37:%[^ ]*]] +// CHECK: OpDecorate [[A11]] PayloadNodeSparseArrayAMDX +// CHECK: OpDecorateId [[A12:%[^ ]*]] PayloadNodeNameAMDX [[S12:%[^ ]*]] +// CHECK: OpDecorateId [[A12]] NodeMaxPayloadsAMDX [[U47:%[^ ]*]] +// CHECK: OpDecorate [[A12]] PayloadNodeSparseArrayAMDX +// CHECK: OpDecorateId [[A20:%[^ ]*]] PayloadNodeNameAMDX [[S20:%[^ ]*]] +// CHECK: OpDecorateId [[A20]] NodeMaxPayloadsAMDX [[U41:%[^ ]*]] +// CHECK: OpDecorate [[A20]] PayloadNodeSparseArrayAMDX +// CHECK: OpDecorateId [[A20]] PayloadNodeArraySizeAMDX [[U131:%[^ ]*]] +// CHECK: OpDecorateId [[A21:%[^ ]*]] PayloadNodeNameAMDX [[S21:%[^ ]*]] +// CHECK: OpDecorateId [[A21]] NodeMaxPayloadsAMDX [[U43:%[^ ]*]] +// CHECK: OpDecorate [[A21]] PayloadNodeSparseArrayAMDX +// CHECK: OpDecorateId [[A22:%[^ ]*]] PayloadNodeNameAMDX [[S22:%[^ ]*]] +// CHECK: OpDecorateId [[A22]] NodeMaxPayloadsAMDX [[U53:%[^ ]*]] +// CHECK: OpDecorate [[A22]] PayloadNodeSparseArrayAMDX +// CHECK: [[UINT:%[^ ]*]] = OpTypeInt 32 0 +// CHECK: [[U0:%[^ ]*]] = OpConstant [[UINT]] 0 +// CHECK: [[RECORD:%[^ ]*]] = OpTypeStruct [[UINT]] [[UINT]] +// CHECK-DAG: [[A10]] = OpTypeNodePayloadArrayAMDX [[RECORD]] +// CHECK-DAG: [[S10]] = OpConstantStringAMDX "OutputArray_1_0" +// CHECK-DAG: [[U31]] = OpConstant [[UINT]] 31 +// CHECK-DAG: [[U129]] = OpConstant [[UINT]] 129 +// CHECK-DAG: [[A11]] = OpTypeNodePayloadArrayAMDX [[RECORD]] +// CHECK-DAG: [[S11]] = OpConstantStringAMDX "OutputArray_1_1" +// CHECK-DAG: [[U37]] = OpConstant [[UINT]] 37 +// CHECK-DAG: [[A12]] = OpTypeNodePayloadArrayAMDX [[RECORD]] +// CHECK-DAG: [[S12]] = OpConstantStringAMDX "Output_1_2" +// CHECK-DAG: [[U47]] = OpConstant [[UINT]] 47 +// CHECK-DAG: [[EMPTY:%[^ ]*]] = OpTypeStruct +// CHECK-DAG: [[A20]] = OpTypeNodePayloadArrayAMDX [[EMPTY]] +// CHECK-DAG: [[S20]] = OpConstantStringAMDX "OutputArray_2_0" +// CHECK-DAG: [[U41]] = OpConstant [[UINT]] 41 +// CHECK-DAG: [[U131]] = OpConstant [[UINT]] 131 +// CHECK-DAG: [[A21]] = OpTypeNodePayloadArrayAMDX [[EMPTY]] +// CHECK-DAG: [[S21]] = OpConstantStringAMDX "OutputArray_2_1" +// CHECK-DAG: [[U43]] = OpConstant [[UINT]] 43 +// CHECK-DAG: [[A22]] = OpTypeNodePayloadArrayAMDX [[EMPTY]] +// CHECK-DAG: [[S22]] = OpConstantStringAMDX "Output_2_2" +// CHECK-DAG: [[U53]] = OpConstant [[UINT]] 53 + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(1, 1, 1)] +void node_1_0( + [AllowSparseNodes] [NodeArraySize(129)] [MaxRecords(31)] + NodeOutputArray OutputArray_1_0) { + ThreadNodeOutputRecords outRec = OutputArray_1_0[1].GetThreadNodeOutputRecords(2); + outRec.OutputComplete(); +} + +// CHECK: [[NODE10]] = OpFunction %void None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(1, 1, 1)] +void node_1_1( + [UnboundedSparseNodes] [MaxRecords(37)] + NodeOutputArray OutputArray_1_1) { + ThreadNodeOutputRecords outRec = OutputArray_1_1[1].GetThreadNodeOutputRecords(2); + outRec.OutputComplete(); +} + +// CHECK: [[NODE11]] = OpFunction %void None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(1, 1, 1)] +void node_1_2( + [AllowSparseNodes] [MaxRecords(47)] + NodeOutput Output_1_2) { + ThreadNodeOutputRecords outRec = Output_1_2.GetThreadNodeOutputRecords(2); + outRec.OutputComplete(); +} + +// CHECK: [[NODE12]] = OpFunction %void None +// CHECK: %{{[^ ]*}} = OpAllocateNodePayloadsAMDX %{{[^ ]*}} %{{[^ ]*}} %{{[^ ]*}} [[U0]] +// CHECK: OpFunctionEnd + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(1, 1, 1)] +void node_2_0( + [AllowSparseNodes] [NodeArraySize(131)] [MaxRecords(41)] + EmptyNodeOutputArray OutputArray_2_0) { + OutputArray_2_0[1].GroupIncrementOutputCount(10); +} + +// CHECK: [[NODE20]] = OpFunction %void None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(1, 1, 1)] +void node_2_1( + [UnboundedSparseNodes] [MaxRecords(43)] + EmptyNodeOutputArray OutputArray_2_1) { + OutputArray_2_1[1].GroupIncrementOutputCount(10); +} + +// CHECK: [[NODE21]] = OpFunction %void None +// CHECK: OpFunctionEnd + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeDispatchGrid(1, 1, 1)] +[NumThreads(1, 1, 1)] +void node_2_2( + [AllowSparseNodes] [MaxRecords(53)] + EmptyNodeOutput Output_2_2) { + Output_2_2.GroupIncrementOutputCount(10); +} + +// CHECK: [[NODE22]] = OpFunction %void None +// CHECK: %{{[^ ]*}} = OpAllocateNodePayloadsAMDX %{{[^ ]*}} %{{[^ ]*}} %{{[^ ]*}} [[U0]] +// CHECK: OpFunctionEnd diff --git a/tools/clang/test/CodeGenSPIRV/node.thread.num-threads.hlsl b/tools/clang/test/CodeGenSPIRV/node.thread.num-threads.hlsl new file mode 100644 index 0000000000..8732cf3478 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.thread.num-threads.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// NumThreads + +[Shader("node")] +[NodeLaunch("thread")] +[NumThreads(1,1,1)] +[NodeIsProgramEntry] +void node010_thread_numthreads_shader() +{ +} + +// CHECK: OpEntryPoint GLCompute [[SHADER:%[0-9A-Za-z_]*]] +// CHECK: OpExecutionMode [[SHADER]] LocalSize 1 1 1 +// CHECK: OpReturn diff --git a/tools/clang/test/CodeGenSPIRV/node.thread.num-threads.none.hlsl b/tools/clang/test/CodeGenSPIRV/node.thread.num-threads.none.hlsl new file mode 100644 index 0000000000..0b230479c4 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/node.thread.num-threads.none.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -spirv -Od -T lib_6_8 -fspv-target-env=vulkan1.3 %s | FileCheck %s + +// Thread launch node without NumThreads specified should use a +// default of (1,1,1) + +[Shader("node")] +[NodeLaunch("thread")] +[NodeIsProgramEntry] +void node011_thread_numthreads_none() +{ +} + +// CHECK: OpEntryPoint GLCompute [[SHADER:%[0-9A-Za-z_]*]] +// CHECK: OpExecutionMode [[SHADER]] LocalSize 1 1 1 +// CHECK: OpReturn diff --git a/tools/clang/test/CodeGenSPIRV/op.vector.swizzle.buffer-store.hlsl b/tools/clang/test/CodeGenSPIRV/op.vector.swizzle.buffer-store.hlsl new file mode 100644 index 0000000000..5d77d222f9 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/op.vector.swizzle.buffer-store.hlsl @@ -0,0 +1,26 @@ +// RUN: %dxc -T cs_6_0 -E main -fcgl %s -spirv | FileCheck %s + +RWStructuredBuffer buffer; + +// CHECK-DAG: [[v4_0:%[0-9]+]] = OpConstantComposite %v4uint %uint_0 %uint_0 %uint_0 %uint_0 +// CHECK-DAG: [[v4_1:%[0-9]+]] = OpConstantComposite %v4uint %uint_1 %uint_1 %uint_1 %uint_1 + +[numthreads(1, 1, 1)] +void main() +{ +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Uniform_v4uint %buffer %int_0 %uint_0 +// CHECK: [[load:%[0-9]+]] = OpLoad %v4uint [[ptr]] +// CHECK: [[cast:%[0-9]+]] = OpINotEqual %v4bool [[load]] [[v4_0]] +// CHECK: [[shuf:%[0-9]+]] = OpVectorShuffle %v3bool [[cast]] [[cast]] 0 1 2 +// CHECK: OpStore %a [[shuf]] + bool3 a = buffer[0].xyz; + +// CHECK: [[a:%[0-9]+]] = OpLoad %v3bool %a +// CHECK: [[ptr:%[0-9]+]] = OpAccessChain %_ptr_Uniform_v4uint %buffer %int_0 %uint_1 +// CHECK: [[load:%[0-9]+]] = OpLoad %v4uint [[ptr]] +// CHECK: [[cast:%[0-9]+]] = OpINotEqual %v4bool [[load]] [[v4_0]] +// CHECK: [[shuf:%[0-9]+]] = OpVectorShuffle %v4bool [[cast]] [[a]] 4 5 6 3 +// CHECK: [[cast:%[0-9]+]] = OpSelect %v4uint [[shuf]] [[v4_1]] [[v4_0]] +// CHECK: OpStore [[ptr]] [[cast]] + buffer[1].xyz = a; +} diff --git a/tools/clang/test/CodeGenSPIRV/rich.debug.function.param.hlsl b/tools/clang/test/CodeGenSPIRV/rich.debug.function.param.hlsl index 9576837884..a3701a4ed4 100644 --- a/tools/clang/test/CodeGenSPIRV/rich.debug.function.param.hlsl +++ b/tools/clang/test/CodeGenSPIRV/rich.debug.function.param.hlsl @@ -9,7 +9,7 @@ // CHECK: [[x:%[0-9]+]] = OpString "x" // CHECK: [[srcMainName:%[0-9]+]] = OpString "main" // CHECK: [[color:%[0-9]+]] = OpString "color" -// CHECK: [[mainName:%[0-9]+]] = OpString "wrapper" +// CHECK: [[mainName:%[0-9]+]] = OpString "__dxc_setup" // CHECK: [[int:%[0-9]+]] = OpExtInst %void [[set]] DebugTypeBasic {{%[0-9]+}} %uint_32 Signed // CHECK: [[float:%[0-9]+]] = OpExtInst %void [[set]] DebugTypeBasic {{%[0-9]+}} %uint_32 Float diff --git a/tools/clang/test/CodeGenSPIRV/shader.debug.function.hlsl b/tools/clang/test/CodeGenSPIRV/shader.debug.function.hlsl index b263fd88ad..23bb479a46 100644 --- a/tools/clang/test/CodeGenSPIRV/shader.debug.function.hlsl +++ b/tools/clang/test/CodeGenSPIRV/shader.debug.function.hlsl @@ -6,7 +6,7 @@ // CHECK: [[fooName:%[0-9]+]] = OpString "foo" // CHECK: [[emptyStr:%[0-9]+]] = OpString "" // CHECK: [[srcMainName:%[0-9]+]] = OpString "main" -// CHECK: [[mainName:%[0-9]+]] = OpString "wrapper" +// CHECK: [[mainName:%[0-9]+]] = OpString "__dxc_setup" // CHECK: [[clOpts:%[0-9]+]] = OpString " -E main -T ps_6_0 -spirv -fcgl -fspv-debug=vulkan // CHECK: [[int:%[0-9]+]] = OpExtInst %void [[set]] DebugTypeBasic {{%[0-9]+}} %uint_32 %uint_4 %uint_0 diff --git a/tools/clang/test/CodeGenSPIRV/spv.intrinsicConstantValue.hlsl b/tools/clang/test/CodeGenSPIRV/spv.intrinsicConstantValue.hlsl new file mode 100644 index 0000000000..a592863f1b --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/spv.intrinsicConstantValue.hlsl @@ -0,0 +1,13 @@ +// RUN: %dxc -Od -T cs_6_8 -spirv -fcgl %s | FileCheck %s + +// CHECK: %spirvIntrinsicType = OpTypeInt 8 0 +using uint8_t [[vk::ext_capability(/* Int8 */ 39)]] = + vk::SpirvType >, + vk::Literal > >; + +[[vk::ext_instruction(/* OpConstant */ 43)]] uint8_t mkconsant([[vk::ext_literal]] int v); + +// CHECK: OpConstant %spirvIntrinsicType 42 +static const uint8_t K = mkconsant(42); + +[numthreads(1, 1, 1)] void main() {} diff --git a/tools/clang/test/CodeGenSPIRV/spv.intrinsicInTemplate.hlsl b/tools/clang/test/CodeGenSPIRV/spv.intrinsicInTemplate.hlsl new file mode 100644 index 0000000000..0ecda64dbb --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/spv.intrinsicInTemplate.hlsl @@ -0,0 +1,29 @@ +// RUN: %dxc -T cs_6_8 -HV 2021 -O0 -spirv -fspv-target-env=universal1.5 %s | FileCheck %s + +// CHECK: [[Int8Type:%.*]] = OpTypeInt 8 0 +using Int8Type = vk::SpirvType >, + vk::Literal > >; + +// CHECK: [[MatrixType:%.*]] = OpTypeCooperativeMatrixKHR [[Int8Type]] %uint_3 %uint_16 %uint_16 %uint_0 +using I8MatA = vk::SpirvOpaqueType< + /* OpTypeCooperativeMatrixKHR */ 4456, Int8Type, + vk::integral_constant, + vk::integral_constant, vk::integral_constant, + vk::integral_constant >; + +template +[[vk::ext_instruction(/* OpCooperativeMatrixLoadKHR */ 4457)]] ResultType +__builtin_spv_CooperativeMatrixLoadKHR([[vk::ext_reference]] PointerType pointer, + uint32_t memory_layout, uint32_t stride, [[vk::ext_literal]] uint32_t memory_operand); + +StructuredBuffer buffer : register(t0, space0); + +[numthreads(32, 1, 1)] void main() { + [[vk::ext_extension("SPV_KHR_cooperative_matrix")]] + [[vk::ext_capability(/* CooperativeMatrixKHRCapability */ 6022)]] + [[vk::ext_capability(/* VulkanMemoryModel */ 5345)]] + [[vk::ext_capability(/* Int8 */ 39)]] + // CHECK: OpCooperativeMatrixLoadKHR [[MatrixType]] %{{.*}} %uint_0 %uint_32 None + I8MatA matA = __builtin_spv_CooperativeMatrixLoadKHR(buffer[0], /* rowMajor */ 0, 32, 0); +} diff --git a/tools/clang/test/CodeGenSPIRV/type.buffer.hlsl b/tools/clang/test/CodeGenSPIRV/type.buffer.hlsl index 35d1b868a8..769fe808b2 100644 --- a/tools/clang/test/CodeGenSPIRV/type.buffer.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.buffer.hlsl @@ -1,109 +1,144 @@ -// RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s +// RUN: %dxc -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,INFER +// RUN: %dxc -fspv-use-unknown-image-format -T ps_6_0 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN // CHECK: OpCapability SampledBuffer -// CHECK: OpCapability StorageImageExtendedFormats +// INFER: OpCapability StorageImageExtendedFormats -// CHECK: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 1 R32i +// INFER: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 1 R32i +// UNKNOWN: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 1 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image Buffer intbuf; -// CHECK: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 1 R32ui +// INFER: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 1 R32ui +// UNKNOWN: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 1 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_0 = OpTypePointer UniformConstant %type_buffer_image_0 Buffer uintbuf; -// CHECK: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 1 R32f +// INFER: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 1 R32f +// UNKNOWN: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 1 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_1 = OpTypePointer UniformConstant %type_buffer_image_1 Buffer floatbuf; -// CHECK: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 R32i +// INFER: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 R32i +// UNKNOWN: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_2 = OpTypePointer UniformConstant %type_buffer_image_2 RWBuffer intrwbuf; -// CHECK: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 R32ui +// INFER: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 R32ui +// UNKNOWN: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_3 = OpTypePointer UniformConstant %type_buffer_image_3 RWBuffer uintrwbuf; -// CHECK: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 R32f +// INFER: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 R32f +// UNKNOWN: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_4 = OpTypePointer UniformConstant %type_buffer_image_4 RWBuffer floatrwbuf; -// CHECK: %type_buffer_image_5 = OpTypeImage %int Buffer 2 0 0 1 Rg32i -// CHECK: %_ptr_UniformConstant_type_buffer_image_5 = OpTypePointer UniformConstant %type_buffer_image_5 +// If the `Unkonwn image format is used, then the images below will reuse the types above. +// UNKNOWN-NOT: OpTypeImage + +// INFER: %type_buffer_image_5 = OpTypeImage %int Buffer 2 0 0 1 Rg32i +// INFER: %_ptr_UniformConstant_type_buffer_image_5 = OpTypePointer UniformConstant %type_buffer_image_5 Buffer int2buf; -// CHECK: %type_buffer_image_6 = OpTypeImage %uint Buffer 2 0 0 1 Rg32ui -// CHECK: %_ptr_UniformConstant_type_buffer_image_6 = OpTypePointer UniformConstant %type_buffer_image_6 +// INFER: %type_buffer_image_6 = OpTypeImage %uint Buffer 2 0 0 1 Rg32ui +// INFER: %_ptr_UniformConstant_type_buffer_image_6 = OpTypePointer UniformConstant %type_buffer_image_6 Buffer uint2buf; -// CHECK: %type_buffer_image_7 = OpTypeImage %float Buffer 2 0 0 1 Rg32f -// CHECK: %_ptr_UniformConstant_type_buffer_image_7 = OpTypePointer UniformConstant %type_buffer_image_7 +// INFER: %type_buffer_image_7 = OpTypeImage %float Buffer 2 0 0 1 Rg32f +// INFER: %_ptr_UniformConstant_type_buffer_image_7 = OpTypePointer UniformConstant %type_buffer_image_7 Buffer float2buf; -// CHECK: %type_buffer_image_8 = OpTypeImage %int Buffer 2 0 0 2 Rg32i -// CHECK: %_ptr_UniformConstant_type_buffer_image_8 = OpTypePointer UniformConstant %type_buffer_image_8 +// INFER: %type_buffer_image_8 = OpTypeImage %int Buffer 2 0 0 2 Rg32i +// INFER: %_ptr_UniformConstant_type_buffer_image_8 = OpTypePointer UniformConstant %type_buffer_image_8 RWBuffer int2rwbuf; -// CHECK: %type_buffer_image_9 = OpTypeImage %uint Buffer 2 0 0 2 Rg32ui -// CHECK: %_ptr_UniformConstant_type_buffer_image_9 = OpTypePointer UniformConstant %type_buffer_image_9 +// INFER: %type_buffer_image_9 = OpTypeImage %uint Buffer 2 0 0 2 Rg32ui +// INFER: %_ptr_UniformConstant_type_buffer_image_9 = OpTypePointer UniformConstant %type_buffer_image_9 RWBuffer uint2rwbuf; -// CHECK: %type_buffer_image_10 = OpTypeImage %float Buffer 2 0 0 2 Rg32f -// CHECK: %_ptr_UniformConstant_type_buffer_image_10 = OpTypePointer UniformConstant %type_buffer_image_10 +// INFER: %type_buffer_image_10 = OpTypeImage %float Buffer 2 0 0 2 Rg32f +// INFER: %_ptr_UniformConstant_type_buffer_image_10 = OpTypePointer UniformConstant %type_buffer_image_10 RWBuffer float2rwbuf; -// CHECK: %type_buffer_image_11 = OpTypeImage %int Buffer 2 0 0 1 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_11 = OpTypePointer UniformConstant %type_buffer_image_11 -// CHECK: %type_buffer_image_12 = OpTypeImage %int Buffer 2 0 0 1 Rgba32i -// CHECK: %_ptr_UniformConstant_type_buffer_image_12 = OpTypePointer UniformConstant %type_buffer_image_12 +// INFER: %type_buffer_image_11 = OpTypeImage %int Buffer 2 0 0 1 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_11 = OpTypePointer UniformConstant %type_buffer_image_11 +// INFER: %type_buffer_image_12 = OpTypeImage %int Buffer 2 0 0 1 Rgba32i +// INFER: %_ptr_UniformConstant_type_buffer_image_12 = OpTypePointer UniformConstant %type_buffer_image_12 Buffer int3buf; Buffer int4buf; -// CHECK: %type_buffer_image_13 = OpTypeImage %uint Buffer 2 0 0 1 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_13 = OpTypePointer UniformConstant %type_buffer_image_13 -// CHECK: %type_buffer_image_14 = OpTypeImage %uint Buffer 2 0 0 1 Rgba32ui -// CHECK: %_ptr_UniformConstant_type_buffer_image_14 = OpTypePointer UniformConstant %type_buffer_image_14 +// INFER: %type_buffer_image_13 = OpTypeImage %uint Buffer 2 0 0 1 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_13 = OpTypePointer UniformConstant %type_buffer_image_13 +// INFER: %type_buffer_image_14 = OpTypeImage %uint Buffer 2 0 0 1 Rgba32ui +// INFER: %_ptr_UniformConstant_type_buffer_image_14 = OpTypePointer UniformConstant %type_buffer_image_14 Buffer uint3buf; Buffer uint4buf; -// CHECK: %type_buffer_image_15 = OpTypeImage %float Buffer 2 0 0 1 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_15 = OpTypePointer UniformConstant %type_buffer_image_15 -// CHECK: %type_buffer_image_16 = OpTypeImage %float Buffer 2 0 0 1 Rgba32f -// CHECK: %_ptr_UniformConstant_type_buffer_image_16 = OpTypePointer UniformConstant %type_buffer_image_16 +// INFER: %type_buffer_image_15 = OpTypeImage %float Buffer 2 0 0 1 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_15 = OpTypePointer UniformConstant %type_buffer_image_15 +// INFER: %type_buffer_image_16 = OpTypeImage %float Buffer 2 0 0 1 Rgba32f +// INFER: %_ptr_UniformConstant_type_buffer_image_16 = OpTypePointer UniformConstant %type_buffer_image_16 Buffer float3buf; Buffer float4buf; -// CHECK: %type_buffer_image_17 = OpTypeImage %int Buffer 2 0 0 2 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_17 = OpTypePointer UniformConstant %type_buffer_image_17 -// CHECK: %type_buffer_image_18 = OpTypeImage %int Buffer 2 0 0 2 Rgba32i -// CHECK: %_ptr_UniformConstant_type_buffer_image_18 = OpTypePointer UniformConstant %type_buffer_image_18 +// INFER: %type_buffer_image_17 = OpTypeImage %int Buffer 2 0 0 2 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_17 = OpTypePointer UniformConstant %type_buffer_image_17 +// INFER: %type_buffer_image_18 = OpTypeImage %int Buffer 2 0 0 2 Rgba32i +// INFER: %_ptr_UniformConstant_type_buffer_image_18 = OpTypePointer UniformConstant %type_buffer_image_18 RWBuffer int3rwbuf; RWBuffer int4rwbuf; -// CHECK: %type_buffer_image_19 = OpTypeImage %uint Buffer 2 0 0 2 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_19 = OpTypePointer UniformConstant %type_buffer_image_19 -// CHECK: %type_buffer_image_20 = OpTypeImage %uint Buffer 2 0 0 2 Rgba32ui -// CHECK: %_ptr_UniformConstant_type_buffer_image_20 = OpTypePointer UniformConstant %type_buffer_image_20 +// INFER: %type_buffer_image_19 = OpTypeImage %uint Buffer 2 0 0 2 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_19 = OpTypePointer UniformConstant %type_buffer_image_19 +// INFER: %type_buffer_image_20 = OpTypeImage %uint Buffer 2 0 0 2 Rgba32ui +// INFER: %_ptr_UniformConstant_type_buffer_image_20 = OpTypePointer UniformConstant %type_buffer_image_20 RWBuffer uint3rwbuf; RWBuffer uint4rwbuf; -// CHECK: %type_buffer_image_21 = OpTypeImage %float Buffer 2 0 0 2 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_21 = OpTypePointer UniformConstant %type_buffer_image_21 -// CHECK: %type_buffer_image_22 = OpTypeImage %float Buffer 2 0 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_buffer_image_22 = OpTypePointer UniformConstant %type_buffer_image_22 +// INFER: %type_buffer_image_21 = OpTypeImage %float Buffer 2 0 0 2 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_21 = OpTypePointer UniformConstant %type_buffer_image_21 +// INFER: %type_buffer_image_22 = OpTypeImage %float Buffer 2 0 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_buffer_image_22 = OpTypePointer UniformConstant %type_buffer_image_22 RWBuffer float3rwbuf; RWBuffer float4rwbuf; -// CHECK: %intbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant -// CHECK: %uintbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant -// CHECK: %floatbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant -// CHECK: %intrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant -// CHECK: %uintrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant -// CHECK: %floatrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant -// CHECK: %int2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_5 UniformConstant -// CHECK: %uint2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_6 UniformConstant -// CHECK: %float2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_7 UniformConstant -// CHECK: %int2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_8 UniformConstant -// CHECK: %uint2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_9 UniformConstant -// CHECK: %float2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_10 UniformConstant -// CHECK: %int3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_11 UniformConstant -// CHECK: %int4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_12 UniformConstant -// CHECK: %uint3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_13 UniformConstant -// CHECK: %uint4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_14 UniformConstant -// CHECK: %float3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_15 UniformConstant -// CHECK: %float4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_16 UniformConstant -// CHECK: %int3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_17 UniformConstant -// CHECK: %int4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_18 UniformConstant -// CHECK: %uint3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_19 UniformConstant -// CHECK: %uint4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_20 UniformConstant -// CHECK: %float3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_21 UniformConstant -// CHECK: %float4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_22 UniformConstant +// INFER: %intbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// INFER: %uintbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// INFER: %floatbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// INFER: %intrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// INFER: %uintrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// INFER: %floatrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant +// INFER: %int2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_5 UniformConstant +// INFER: %uint2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_6 UniformConstant +// INFER: %float2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_7 UniformConstant +// INFER: %int2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_8 UniformConstant +// INFER: %uint2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_9 UniformConstant +// INFER: %float2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_10 UniformConstant +// INFER: %int3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_11 UniformConstant +// INFER: %int4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_12 UniformConstant +// INFER: %uint3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_13 UniformConstant +// INFER: %uint4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_14 UniformConstant +// INFER: %float3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_15 UniformConstant +// INFER: %float4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_16 UniformConstant +// INFER: %int3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_17 UniformConstant +// INFER: %int4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_18 UniformConstant +// INFER: %uint3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_19 UniformConstant +// INFER: %uint4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_20 UniformConstant +// INFER: %float3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_21 UniformConstant +// INFER: %float4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_22 UniformConstant + +// UNKNOWN: %intbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %uintbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %floatbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %intrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// UNKNOWN: %uintrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// UNKNOWN: %floatrwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant +// UNKNOWN: %int2buf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %uint2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %float2buf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %int2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// UNKNOWN: %uint2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// UNKNOWN: %float2rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant +// UNKNOWN: %int3buf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %int4buf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %uint3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %uint4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %float3buf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %float4buf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %int3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// UNKNOWN: %int4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// UNKNOWN: %uint3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// UNKNOWN: %uint4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// UNKNOWN: %float3rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant +// UNKNOWN: %float4rwbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant void main() {} diff --git a/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-buffer.hlsl b/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-buffer.hlsl index c616f65bb9..cf84562e52 100644 --- a/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-buffer.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-buffer.hlsl @@ -1,59 +1,80 @@ -// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s +// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,INFER +// RUN: %dxc -fspv-use-unknown-image-format -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN + +// Before vulkan1.3, we should be trying to infer the image type for because +// we cannot necessarily use Unknown. However in VK1.3 and later, we can use +// Unknown. // CHECK: OpCapability SampledBuffer -// CHECK: OpCapability StorageImageExtendedFormats +// INFER: OpCapability StorageImageExtendedFormats -// CHECK: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 2 R32i +// INFER: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 2 R32i +// UNKNOWN: %type_buffer_image = OpTypeImage %int Buffer 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image = OpTypePointer UniformConstant %type_buffer_image RasterizerOrderedBuffer introvbuf; -// CHECK: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 2 R32ui +// INFER: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 2 R32ui +// UNKNOWN: %type_buffer_image_0 = OpTypeImage %uint Buffer 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_0 = OpTypePointer UniformConstant %type_buffer_image_0 RasterizerOrderedBuffer uintrovbuf; -// CHECK: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 2 R32f +// INFER: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 2 R32f +// UNKNOWN: %type_buffer_image_1 = OpTypeImage %float Buffer 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_buffer_image_1 = OpTypePointer UniformConstant %type_buffer_image_1 RasterizerOrderedBuffer floatrovbuf; -// CHECK: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 Rg32i -// CHECK: %_ptr_UniformConstant_type_buffer_image_2 = OpTypePointer UniformConstant %type_buffer_image_2 +// INFER: %type_buffer_image_2 = OpTypeImage %int Buffer 2 0 0 2 Rg32i +// INFER: %_ptr_UniformConstant_type_buffer_image_2 = OpTypePointer UniformConstant %type_buffer_image_2 RasterizerOrderedBuffer int2rovbuf; -// CHECK: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 Rg32ui -// CHECK: %_ptr_UniformConstant_type_buffer_image_3 = OpTypePointer UniformConstant %type_buffer_image_3 +// INFER: %type_buffer_image_3 = OpTypeImage %uint Buffer 2 0 0 2 Rg32ui +// INFER: %_ptr_UniformConstant_type_buffer_image_3 = OpTypePointer UniformConstant %type_buffer_image_3 RasterizerOrderedBuffer uint2rovbuf; -// CHECK: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 Rg32f -// CHECK: %_ptr_UniformConstant_type_buffer_image_4 = OpTypePointer UniformConstant %type_buffer_image_4 +// INFER: %type_buffer_image_4 = OpTypeImage %float Buffer 2 0 0 2 Rg32f +// INFER: %_ptr_UniformConstant_type_buffer_image_4 = OpTypePointer UniformConstant %type_buffer_image_4 RasterizerOrderedBuffer float2rovbuf; -// CHECK: %type_buffer_image_5 = OpTypeImage %int Buffer 2 0 0 2 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_5 = OpTypePointer UniformConstant %type_buffer_image_5 -// CHECK: %type_buffer_image_6 = OpTypeImage %int Buffer 2 0 0 2 Rgba32i -// CHECK: %_ptr_UniformConstant_type_buffer_image_6 = OpTypePointer UniformConstant %type_buffer_image_6 +// INFER: %type_buffer_image_5 = OpTypeImage %int Buffer 2 0 0 2 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_5 = OpTypePointer UniformConstant %type_buffer_image_5 +// INFER: %type_buffer_image_6 = OpTypeImage %int Buffer 2 0 0 2 Rgba32i +// INFER: %_ptr_UniformConstant_type_buffer_image_6 = OpTypePointer UniformConstant %type_buffer_image_6 RasterizerOrderedBuffer int3rovbuf; RasterizerOrderedBuffer int4rovbuf; -// CHECK: %type_buffer_image_7 = OpTypeImage %uint Buffer 2 0 0 2 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_7 = OpTypePointer UniformConstant %type_buffer_image_7 -// CHECK: %type_buffer_image_8 = OpTypeImage %uint Buffer 2 0 0 2 Rgba32ui -// CHECK: %_ptr_UniformConstant_type_buffer_image_8 = OpTypePointer UniformConstant %type_buffer_image_8 +// INFER: %type_buffer_image_7 = OpTypeImage %uint Buffer 2 0 0 2 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_7 = OpTypePointer UniformConstant %type_buffer_image_7 +// INFER: %type_buffer_image_8 = OpTypeImage %uint Buffer 2 0 0 2 Rgba32ui +// INFER: %_ptr_UniformConstant_type_buffer_image_8 = OpTypePointer UniformConstant %type_buffer_image_8 RasterizerOrderedBuffer uint3rovbuf; RasterizerOrderedBuffer uint4rovbuf; -// CHECK: %type_buffer_image_9 = OpTypeImage %float Buffer 2 0 0 2 Unknown -// CHECK: %_ptr_UniformConstant_type_buffer_image_9 = OpTypePointer UniformConstant %type_buffer_image_9 -// CHECK: %type_buffer_image_10 = OpTypeImage %float Buffer 2 0 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_buffer_image_10 = OpTypePointer UniformConstant %type_buffer_image_10 +// INFER: %type_buffer_image_9 = OpTypeImage %float Buffer 2 0 0 2 Unknown +// INFER: %_ptr_UniformConstant_type_buffer_image_9 = OpTypePointer UniformConstant %type_buffer_image_9 +// INFER: %type_buffer_image_10 = OpTypeImage %float Buffer 2 0 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_buffer_image_10 = OpTypePointer UniformConstant %type_buffer_image_10 RasterizerOrderedBuffer float3rovbuf; RasterizerOrderedBuffer float4rovbuf; -// CHECK: %introvbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant -// CHECK: %uintrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant -// CHECK: %floatrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant -// CHECK: %int2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant -// CHECK: %uint2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant -// CHECK: %float2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant -// CHECK: %int3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_5 UniformConstant -// CHECK: %int4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_6 UniformConstant -// CHECK: %uint3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_7 UniformConstant -// CHECK: %uint4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_8 UniformConstant -// CHECK: %float3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_9 UniformConstant -// CHECK: %float4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_10 UniformConstant +// INFER: %introvbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// INFER: %uintrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// INFER: %floatrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// INFER: %int2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_2 UniformConstant +// INFER: %uint2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_3 UniformConstant +// INFER: %float2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_4 UniformConstant +// INFER: %int3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_5 UniformConstant +// INFER: %int4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_6 UniformConstant +// INFER: %uint3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_7 UniformConstant +// INFER: %uint4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_8 UniformConstant +// INFER: %float3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_9 UniformConstant +// INFER: %float4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_10 UniformConstant + +// UNKNOWN: %introvbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %uintrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %floatrovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %int2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %uint2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %float2rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %int3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %int4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image UniformConstant +// UNKNOWN: %uint3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %uint4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_0 UniformConstant +// UNKNOWN: %float3rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant +// UNKNOWN: %float4rovbuf = OpVariable %_ptr_UniformConstant_type_buffer_image_1 UniformConstant void main() {} diff --git a/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-texture.hlsl b/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-texture.hlsl index 32dd76e6f1..651840b0e6 100644 --- a/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-texture.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.rasterizer-ordered-texture.hlsl @@ -1,23 +1,27 @@ -// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s +// RUN: %dxc -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,INFER +// RUN: %dxc -fspv-use-unknown-image-format -T ps_6_6 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN // CHECK: OpCapability Image1D -// CHECK: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 R32i +// INFER: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 R32i +// UNKNOWN: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_1d_image = OpTypePointer UniformConstant %type_1d_image -// CHECK: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Rg32ui +// INFER: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Rg32ui +// UNKNOWN: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image -// CHECK: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 R32i +// INFER: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 R32i +// UNKNOWN: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 Unknown // CHECK: %_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image -// CHECK: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 -// CHECK: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 R32i -// CHECK: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array -// CHECK: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Rg32ui -// CHECK: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array -// CHECK: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 -// CHECK: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 +// INFER: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 +// INFER: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 R32i +// INFER: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array +// INFER: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Rg32ui +// INFER: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array +// INFER: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 +// INFER: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 // CHECK: %t1 = OpVariable %_ptr_UniformConstant_type_1d_image UniformConstant @@ -33,7 +37,8 @@ RasterizerOrderedTexture3D t3 ; [[vk::image_format("rgba32f")]] RasterizerOrderedTexture3D t4 ; -// CHECK: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_0 UniformConstant +// INFER: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_0 UniformConstant +// UNKNOWN: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_1 UniformConstant RasterizerOrderedTexture3D t5 ; // CHECK: %t6 = OpVariable %_ptr_UniformConstant_type_1d_image_array UniformConstant diff --git a/tools/clang/test/CodeGenSPIRV/type.rwtexture.hlsl b/tools/clang/test/CodeGenSPIRV/type.rwtexture.hlsl index f901d44cfa..44e7592869 100644 --- a/tools/clang/test/CodeGenSPIRV/type.rwtexture.hlsl +++ b/tools/clang/test/CodeGenSPIRV/type.rwtexture.hlsl @@ -1,24 +1,43 @@ -// RUN: %dxc -T vs_6_0 -E main -fcgl %s -spirv | FileCheck %s +// RUN: %dxc -T vs_6_0 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,INFER +// RUN: %dxc -fspv-use-unknown-image-format -T vs_6_0 -E main -fcgl %s -spirv | FileCheck %s --check-prefixes=CHECK,UNKNOWN // CHECK: OpCapability Image1D -// CHECK: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 R32i -// CHECK: %_ptr_UniformConstant_type_1d_image = OpTypePointer UniformConstant %type_1d_image -// CHECK: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Rg32ui -// CHECK: %_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image -// CHECK: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 R32i -// CHECK: %_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image -// CHECK: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 -// CHECK: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 R32i -// CHECK: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array -// CHECK: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Rg32ui -// CHECK: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array -// CHECK: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 -// CHECK: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Rgba32f -// CHECK: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 +// INFER: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 R32i +// INFER: %_ptr_UniformConstant_type_1d_image = OpTypePointer UniformConstant %type_1d_image +// INFER: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Rg32ui +// INFER: %_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +// INFER: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 R32i +// INFER: %_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image +// INFER: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 +// INFER: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 R32i +// INFER: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array +// INFER: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Rg32ui +// INFER: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array +// INFER: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 +// INFER: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Rgba32f +// INFER: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 +// UNKNOWN: %type_1d_image = OpTypeImage %int 1D 2 0 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_1d_image = OpTypePointer UniformConstant %type_1d_image +// UNKNOWN: %type_2d_image = OpTypeImage %uint 2D 2 0 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_2d_image = OpTypePointer UniformConstant %type_2d_image +// UNKNOWN: %type_3d_image = OpTypeImage %int 3D 2 0 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_3d_image = OpTypePointer UniformConstant %type_3d_image +// UNKNOWN: %type_3d_image_0 = OpTypeImage %float 3D 2 0 0 2 Rgba32f +// UNKNOWN: %_ptr_UniformConstant_type_3d_image_0 = OpTypePointer UniformConstant %type_3d_image_0 +// UNKNOWN: %type_3d_image_1 = OpTypeImage %float 3D 2 0 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_3d_image_1 = OpTypePointer UniformConstant %type_3d_image_1 +// UNKNOWN: %type_1d_image_array = OpTypeImage %int 1D 2 1 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_1d_image_array = OpTypePointer UniformConstant %type_1d_image_array +// UNKNOWN: %type_2d_image_array = OpTypeImage %uint 2D 2 1 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_2d_image_array = OpTypePointer UniformConstant %type_2d_image_array +// UNKNOWN: %type_1d_image_array_0 = OpTypeImage %float 1D 2 1 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_1d_image_array_0 = OpTypePointer UniformConstant %type_1d_image_array_0 +// UNKNOWN: %type_2d_image_array_0 = OpTypeImage %float 2D 2 1 0 2 Unknown +// UNKNOWN: %_ptr_UniformConstant_type_2d_image_array_0 = OpTypePointer UniformConstant %type_2d_image_array_0 // CHECK: %t1 = OpVariable %_ptr_UniformConstant_type_1d_image UniformConstant RWTexture1D t1 ; @@ -33,7 +52,8 @@ RWTexture3D t3 ; [[vk::image_format("rgba32f")]] RWTexture3D t4 ; -// CHECK: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_0 UniformConstant +// INFER: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_0 UniformConstant +// UNKNOWN: %t5 = OpVariable %_ptr_UniformConstant_type_3d_image_1 UniformConstant RWTexture3D t5 ; // CHECK: %t6 = OpVariable %_ptr_UniformConstant_type_1d_image_array UniformConstant diff --git a/tools/clang/test/CodeGenSPIRV/vertex_shader_derivative_in_branch.hlsl b/tools/clang/test/CodeGenSPIRV/vertex_shader_derivative_in_branch.hlsl new file mode 100644 index 0000000000..9719dc1dc5 --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vertex_shader_derivative_in_branch.hlsl @@ -0,0 +1,23 @@ +// RUN: %dxc -T vs_6_0 -E main -DCOND=false -fspv-target-env=vulkan1.3 %s -spirv | FileCheck %s +// CHECK-NOT: OpCapability DerivativeControl +// CHECK-NOT: OpExtension "SPV_KHR_compute_shader_derivatives" + +// RUN: not %dxc -T vs_6_0 -E main -DCOND=true -fspv-target-env=vulkan1.3 %s -spirv 2>&1 | FileCheck %s -check-prefix=ERROR +// ERROR: generated SPIR-V is invalid: +// ERROR-NEXT: Derivative instructions require Fragment, GLCompute, MeshEXT or TaskEXT execution model: DPdx + +struct VSOut +{ + float4 pos : SV_Position; +}; + +VSOut main(float4 pos : POSITION) +{ + VSOut output; + output.pos = pos; + if (COND) + { + output.pos += ddx(pos); + } + return output; +} diff --git a/tools/clang/test/CodeGenSPIRV/vk.attribute.image-format.hlsl b/tools/clang/test/CodeGenSPIRV/vk.attribute.image-format.hlsl index 12b03fffda..4d10dc446b 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.attribute.image-format.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.attribute.image-format.hlsl @@ -60,12 +60,6 @@ RWBuffer Buf_r64i; [[vk::image_format("r64ui")]] RWBuffer Buf_r64ui; -[[vk::image_format("r16f")]] -// CHECK: [[ImgType:%[0-9a-zA-Z_]+]] = OpTypeImage %float 2D 2 0 0 2 R16f -// CHECK: [[ArrayType:%[0-9a-zA-Z_]+]] = OpTypeRuntimeArray [[ImgType]] -// CHECK: [[PtrType:%[0-9a-zA-Z_]+]] = OpTypePointer UniformConstant [[ArrayType]] -RWTexture2D Buf_r16f_bindless[]; - struct S { RWBuffer b; }; diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl index f0f5c54a16..e063a4bc23 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.cs.hlsl @@ -20,7 +20,7 @@ void main() { foo(rwbuf[0].Get()); } -// CHECK: [[L0:%[_0-9A-Za-z]*]] = OpLoad %{{[_0-9A-Za-z]*}} %{{[_0-9A-Za-z]*}} Aligned 8 +// CHECK: [[L0:%[_0-9A-Za-z]*]] = OpLoad %{{[_0-9A-Za-z]*}} %{{[_0-9A-Za-z]*}} // CHECK: [[L1:%[_0-9A-Za-z]*]] = OpLoad %{{[_0-9A-Za-z]*}} [[L0]] Aligned 8 // CHECK: [[L2:%[_0-9A-Za-z]*]] = OpAccessChain %{{[_0-9A-Za-z]*}} [[L1]] %int_0 // CHECK: OpStore [[L2]] %int_1 Aligned 4 diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl index fc5b9edad0..e159f6997c 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.alias.hlsl @@ -62,10 +62,10 @@ float4 MainPs(void) : SV_Target0 // CHECK: [[X4:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[X3]] // CHECK: OpStore [[BP1]] [[X4]] // CHECK: [[X5:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[VTEST]] -// CHECK: [[X6:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP0]] Aligned 16 +// CHECK: [[X6:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP0]] // CHECK: [[X7:%[_0-9A-Za-z]*]] = OpAccessChain [[PBV4FLOAT]] [[X6]] [[I1]] // CHECK: OpStore [[X7]] [[X5]] Aligned 16 -// CHECK: [[X8:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP1]] Aligned 16 +// CHECK: [[X8:%[_0-9A-Za-z]*]] = OpLoad [[PGS]] [[BP1]] // CHECK: [[X9:%[_0-9A-Za-z]*]] = OpAccessChain [[PBV4FLOAT]] [[X8]] [[I1]] // CHECK: [[X10:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X9]] Aligned 16 // CHECK: OpReturnValue [[X10]] diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl index 992d8b39fd..485da6fd93 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.atomic.hlsl @@ -29,7 +29,7 @@ void main() uint u0, u1; // CHECK: [[X1:%[_0-9]+]] = OpAccessChain %{{[_0-9A-Za-z]*}} [[PC]] [[I0]] -// CHECK: [[X2:%[_0-9]+]] = OpLoad [[PS]] [[X1]] Aligned 4 +// CHECK: [[X2:%[_0-9]+]] = OpLoad [[PS]] [[X1]] // CHECK: [[X3:%[_0-9]+]] = OpAccessChain [[PU]] [[X2]] [[I0]] // CHECK: [[X4:%[_0-9]+]] = OpLoad [[UINT]] [[IN]] // CHECK: [[X5:%[_0-9]+]] = OpAtomicExchange [[UINT]] [[X3]] [[U1]] [[U0]] [[X4]] diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl index b44e1eca09..e7908e0ce7 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.from-uint.hlsl @@ -37,8 +37,8 @@ void main() { // CHECK: [[TEST:%[_0-9A-Za-z]*]] = OpVariable [[PFPPUINT]] Function // CHECK: [[X1:%[_0-9A-Za-z]*]] = OpConvertUToPtr [[PPUINT]] // CHECK: OpStore [[TEST]] [[X1]] -// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PPUINT]] [[TEST]] Aligned 32 -// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpLoad [[UINT]] [[X2]] Aligned 4 +// CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PPUINT]] [[TEST]] +// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpLoad [[UINT]] [[X2]] Aligned 32 // CHECK: [[X4:%[_0-9A-Za-z]*]] = OpAccessChain [[PUUINT]] [[OUTPUT]] [[I0]] [[U0]] // CHECK: OpStore [[X4]] [[X3]] // CHECK: OpReturn diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl index 71fee1a795..75380d3f4e 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.linked-list.hlsl @@ -76,9 +76,9 @@ float4 MainPs(void) : SV_Target0 // CHECK: [[X1:%[_0-9A-Za-z]*]] = OpAccessChain [[PPBLOCK1]] [[GPC]] [[S0]] // CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[X1]] // CHECK: OpStore [[GP]] [[X2]] -// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] Aligned 32 +// CHECK: [[X3:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] // CHECK: [[X4:%[_0-9A-Za-z]*]] = OpAccessChain [[PPBLOCK2]] [[X3]] [[S1]] -// CHECK: [[X5:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[X4]] Aligned 8 +// CHECK: [[X5:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[X4]] Aligned 32 // CHECK: OpStore [[GP]] [[X5]] // CHECK: [[X6:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] // CHECK: [[X7:%[_0-9A-Za-z]*]] = OpConvertPtrToU [[ULONG]] [[X6]] @@ -94,7 +94,7 @@ float4 MainPs(void) : SV_Target0 // CHECK: [[IF_TRUE]] = OpLabel // CHECK: OpReturnValue [[CV4FLOAT]] // CHECK: [[IF_MERGE]] = OpLabel -// CHECK: [[X13:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] Aligned 32 +// CHECK: [[X13:%[_0-9A-Za-z]*]] = OpLoad [[PBLOCK]] [[GP]] // CHECK: [[X14:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X13]] [[S0]] // CHECK: [[X15:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X14]] Aligned 16 // CHECK: OpReturnValue [[X15]] diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl index c7d6f0ed2b..cc3b1a0209 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.read.hlsl @@ -36,7 +36,8 @@ struct TestPushConstant_t float4 MainPs(void) : SV_Target0 { float4 vTest = g_PushConstants.m_nBufferDeviceAddress.Get().g_vTestFloat4; - return vTest; + float f = vk::BufferPointer(0xdeadbeefull).Get(); + return vTest+f; } // CHECK: [[FUN]] = OpFunction @@ -44,5 +45,9 @@ float4 MainPs(void) : SV_Target0 // CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PGLOBALS]] [[X1]] // CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X2]] [[S1]] // CHECK: [[X4:%[_0-9A-Za-z]*]] = OpLoad [[V4FLOAT]] [[X3]] Aligned 16 -// CHECK: OpStore [[OUT]] [[X4]] +// CHECK: [[TEMP_PTR:%[_0-9A-Za-z]*]] = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_float %ulong_3735928559 +// CHECK: [[LD:%[_0-9A-Za-z]*]] = OpLoad %float [[TEMP_PTR]] Aligned 4 +// CHECK: [[CONSTRUCT:%[_0-9A-Za-z]*]] = OpCompositeConstruct [[V4FLOAT]] [[LD]] [[LD]] [[LD]] [[LD]] +// CHECK: [[ADD:%[_0-9A-Za-z]*]] = OpFAdd [[V4FLOAT]] [[X4]] [[CONSTRUCT]] +// CHECK: OpStore [[OUT]] [[ADD]] // CHECK: OpFunctionEnd diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl index 930770cc16..5132c57000 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.rvalue.hlsl @@ -1,4 +1,5 @@ -// RUN: %dxc -spirv -HV 202x -Od -T cs_6_9 %s | FileCheck %s +// RUN: %dxc -spirv -HV 202x -Od -T cs_6_9 %s | FileCheck %s --check-prefix=CHECK --check-prefix=NOFUN +// RUN: %dxc -spirv -HV 202x -Od -T cs_6_9 -DFUN %s | FileCheck %s --check-prefix=CHECK --check-prefix=FUN // Issue #7302: implicit object argument of Get() evaluates to rvalue @@ -20,16 +21,45 @@ struct Content // CHECK: [[V2UINT:%[_0-9A-Za-z]*]] = OpTypeVector [[UINT]] 2 // CHECK: [[VECTOR:%[_0-9A-Za-z]*]] = OpConstantComposite [[V2UINT]] [[UDEADBEEF]] [[U0]] // CHECK: [[CONTENT:%[_0-9A-Za-z]*]] = OpTypeStruct [[INT]] -// CHECK: [[PPCONTENT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[CONTENT]] -// CHECK: [[PPINT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[INT]] +// FUN: [[PFCONTENT:%[_0-9A-Za-z]*]] = OpTypePointer Function [[CONTENT]] +// FUN: [[PFINT:%[_0-9A-Za-z]*]] = OpTypePointer Function [[INT]] +// FUN: [[CONTENT0:%[_0-9A-Za-z]*]] = OpTypeStruct [[INT]] +// FUN: [[PPCONTENT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[CONTENT0]] +// NOFUN: [[PPCONTENT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[CONTENT]] +// NOFUN: [[PPINT:%[_0-9A-Za-z]*]] = OpTypePointer PhysicalStorageBuffer [[INT]] + +Content f() { + return bitcast >(uint32_t2(0xdeadbeefu,0x0u)).Get(); +} [numthreads(1, 1, 1)] void main() { +#ifdef FUN + Content c = f(); + c.a = 1; +#else bitcast >(uint32_t2(0xdeadbeefu,0x0u)).Get().a = 1; +#endif } -// CHECK: [[BITCAST:%[0-9]*]] = OpBitcast [[PPCONTENT]] [[VECTOR]] -// CHECK: [[PTR:%[0-9]*]] = OpAccessChain [[PPINT]] [[BITCAST]] [[IO]] -// CHECK: OpStore [[PTR]] [[I1]] Aligned 4 +// NOFUN: [[BITCAST:%[0-9]*]] = OpBitcast [[PPCONTENT]] [[VECTOR]] +// NOFUN: [[PTR:%[0-9]*]] = OpAccessChain [[PPINT]] [[BITCAST]] [[IO]] +// NOFUN: OpStore [[PTR]] [[I1]] Aligned 4 + +// FUN: [[VAR:%[_0-9A-Za-z]*]] = OpVariable [[PFCONTENT]] Function +// FUN: [[CALL:%[0-9]*]] = OpFunctionCall [[CONTENT]] [[F:%[_0-9A-Za-z]*]] +// FUN: OpStore [[VAR]] [[CALL]] +// FUN: [[PTR:%[0-9]*]] = OpAccessChain [[PFINT]] [[VAR]] [[IO]] +// FUN: OpStore [[PTR]] [[I1]] + +// FUN: [[F]] = OpFunction [[CONTENT]] +// FUN: [[VAR:%[_0-9A-Za-z]*]] = OpVariable [[PFCONTENT]] Function +// FUN: [[BITCAST:%[0-9]*]] = OpBitcast [[PPCONTENT]] [[VECTOR]] +// FUN: [[CVAL0:%[0-9]*]] = OpLoad [[CONTENT0]] [[BITCAST]] Aligned 4 +// FUN: [[IVAL:%[0-9]*]] = OpCompositeExtract [[INT]] [[CVAL0]] 0 +// FUN: [[CVAL1:%[0-9]*]] = OpCompositeConstruct [[CONTENT]] [[IVAL]] +// FUN: OpStore [[VAR]] [[CVAL1]] +// FUN: [[RET:%[0-9]*]] = OpLoad [[CONTENT]] [[VAR]] +// FUN: OpReturnValue [[RET]] diff --git a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl index b2efd02cbd..843815a4a0 100644 --- a/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl +++ b/tools/clang/test/CodeGenSPIRV/vk.buffer-pointer.write.hlsl @@ -40,6 +40,7 @@ float4 MainPs(void) : SV_Target0 { float4 vTest = float4(1.0,0.0,0.0,0.0); g_PushConstants.m_nBufferDeviceAddress.Get().g_vTestFloat4 = vTest; + vk::BufferPointer(0xdeadbeefull).Get() = 4.5f; return vTest; } @@ -48,5 +49,7 @@ float4 MainPs(void) : SV_Target0 // CHECK: [[X2:%[_0-9A-Za-z]*]] = OpLoad [[PGLOBALS]] [[X1]] // CHECK: [[X3:%[_0-9A-Za-z]*]] = OpAccessChain [[PV4FLOAT2]] [[X2]] [[S1]] // CHECK: OpStore [[X3]] [[CV4FLOAT]] Aligned 16 +// CHECK: [[TEMP_PTR:%[_0-9A-Za-z]*]] = OpConvertUToPtr %_ptr_PhysicalStorageBuffer_float %ulong_3735928559 +// CHECK: OpStore [[TEMP_PTR]] %float_4_5 Aligned 4 // CHECK: OpStore [[OUT]] [[CV4FLOAT]] // CHECK: OpFunctionEnd diff --git a/tools/clang/test/CodeGenSPIRV/vk.cloption.invert-y.lib.hlsl b/tools/clang/test/CodeGenSPIRV/vk.cloption.invert-y.lib.hlsl new file mode 100644 index 0000000000..6dac20fc6f --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/vk.cloption.invert-y.lib.hlsl @@ -0,0 +1,12 @@ +// RUN: %dxc -T lib_6_3 -fvk-invert-y -fcgl %s -spirv | FileCheck %s + +[shader("vertex")] +float4 main(float4 a : A) : SV_Position { + return a; +} + +// CHECK: [[a:%[0-9]+]] = OpFunctionCall %v4float %src_main %param_var_a +// CHECK-NEXT: [[oldY:%[0-9]+]] = OpCompositeExtract %float [[a]] 1 +// CHECK-NEXT: [[newY:%[0-9]+]] = OpFNegate %float [[oldY]] +// CHECK-NEXT: [[pos:%[0-9]+]] = OpCompositeInsert %v4float [[newY]] [[a]] 1 +// CHECK-NEXT: OpStore %gl_Position [[pos]] diff --git a/tools/clang/test/DXC/Passes/DxilGen/hitobject_accessors_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/hitobject_accessors_dxilgen.ll new file mode 100644 index 0000000000..4fc6a47780 --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/hitobject_accessors_dxilgen.ll @@ -0,0 +1,687 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s +; REQUIRES: dxil-1-9 + +; +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; outbuf UAV byte r/w U0u4294967295,space4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RWByteAddressBuffer = type { i32 } +%dx.types.HitObject = type { i8* } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.dx::HitObject" = type { i32 } + +@"\01?outbuf@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4 + +; CHECK: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject %{{[^ ]+}}, i32 1) +; CHECK: %{{[^ ]+}} = call i1 @dx.op.hitObject_StateScalar.i1(i32 270, %dx.types.HitObject %{{[^ ]+}}) +; CHECK: %{{[^ ]+}} = call i1 @dx.op.hitObject_StateScalar.i1(i32 269, %dx.types.HitObject %{{[^ ]+}}) +; CHECK: %{{[^ ]+}} = call i1 @dx.op.hitObject_StateScalar.i1(i32 271, %dx.types.HitObject %{{[^ ]+}}) +; CHECK: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 281, %dx.types.HitObject %{{[^ ]+}}) +; CHECK: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 285, %dx.types.HitObject %{{[^ ]+}}) +; CHECK: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 282, %dx.types.HitObject %{{[^ ]+}}) +; CHECK: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 283, %dx.types.HitObject %{{[^ ]+}}) +; CHECK: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 284, %dx.types.HitObject %{{[^ ]+}}) +; CHECK: %{{[^ ]+}} = call i32 @dx.op.hitObject_StateScalar.i32(i32 286, %dx.types.HitObject %{{[^ ]+}}) +; CHECK: %{{[^ ]+}} = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject %{{[^ ]+}}, i32 42) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %{{[^ ]+}}, i32 0) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %{{[^ ]+}}, i32 1) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %{{[^ ]+}}, i32 2) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %{{[^ ]+}}, i32 0) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %{{[^ ]+}}, i32 1) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %{{[^ ]+}}, i32 2) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %{{[^ ]+}}, i32 0) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %{{[^ ]+}}, i32 1) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %{{[^ ]+}}, i32 2) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %{{[^ ]+}}, i32 0) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %{{[^ ]+}}, i32 1) +; CHECK: %{{[^ ]+}} = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %{{[^ ]+}}, i32 2) + +; CHECK: %[[M34OW00:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO:[^ ]+]], i32 0, i32 0) +; CHECK-NEXT: %[[M34VOW0:[^ ]+]] = insertelement <12 x float> undef, float %[[M34OW00]], i64 0 +; CHECK-NEXT: %[[M34OW01:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 0, i32 1) +; CHECK-NEXT: %[[M34VOW1:[^ ]+]] = insertelement <12 x float> %[[M34VOW0]], float %[[M34OW01]], i64 1 +; CHECK-NEXT: %[[M34OW02:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 0, i32 2) +; CHECK-NEXT: %[[M34VOW2:[^ ]+]] = insertelement <12 x float> %[[M34VOW1]], float %[[M34OW02]], i64 2 +; CHECK-NEXT: %[[M34OW03:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 0, i32 3) +; CHECK-NEXT: %[[M34VOW3:[^ ]+]] = insertelement <12 x float> %[[M34VOW2]], float %[[M34OW03]], i64 3 +; CHECK-NEXT: %[[M34OW10:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 1, i32 0) +; CHECK-NEXT: %[[M34VOW4:[^ ]+]] = insertelement <12 x float> %[[M34VOW3]], float %[[M34OW10]], i64 4 +; CHECK-NEXT: %[[M34OW11:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 1, i32 1) +; CHECK-NEXT: %[[M34VOW5:[^ ]+]] = insertelement <12 x float> %[[M34VOW4]], float %[[M34OW11]], i64 5 +; CHECK-NEXT: %[[M34OW12:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 1, i32 2) +; CHECK-NEXT: %[[M34VOW6:[^ ]+]] = insertelement <12 x float> %[[M34VOW5]], float %[[M34OW12]], i64 6 +; CHECK-NEXT: %[[M34OW13:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 1, i32 3) +; CHECK-NEXT: %[[M34VOW7:[^ ]+]] = insertelement <12 x float> %[[M34VOW6]], float %[[M34OW13]], i64 7 +; CHECK-NEXT: %[[M34OW20:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 2, i32 0) +; CHECK-NEXT: %[[M34VOW8:[^ ]+]] = insertelement <12 x float> %[[M34VOW7]], float %[[M34OW20]], i64 8 +; CHECK-NEXT: %[[M34OW21:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 2, i32 1) +; CHECK-NEXT: %[[M34VOW9:[^ ]+]] = insertelement <12 x float> %[[M34VOW8]], float %[[M34OW21]], i64 9 +; CHECK-NEXT: %[[M34OW22:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 2, i32 2) +; CHECK-NEXT: %[[M34VOW10:[^ ]+]] = insertelement <12 x float> %[[M34VOW9]], float %[[M34OW22]], i64 10 +; CHECK-NEXT: %[[M34OW23:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M34OWHO]], i32 2, i32 3) +; CHECK-NEXT: %{{[^ ]+}} = insertelement <12 x float> %[[M34VOW10]], float %[[M34OW23]], i64 11 + +; CHECK: %[[M43OW00:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO:[^ ]+]], i32 0, i32 0) +; CHECK-NEXT: %[[M43VOW0:[^ ]+]] = insertelement <12 x float> undef, float %[[M43OW00]], i64 0 +; CHECK-NEXT: %[[M43OW10:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 1, i32 0) +; CHECK-NEXT: %[[M43VOW1:[^ ]+]] = insertelement <12 x float> %[[M43VOW0]], float %[[M43OW10]], i64 1 +; CHECK-NEXT: %[[M43OW20:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 2, i32 0) +; CHECK-NEXT: %[[M43VOW2:[^ ]+]] = insertelement <12 x float> %[[M43VOW1]], float %[[M43OW20]], i64 2 +; CHECK-NEXT: %[[M43OW01:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 0, i32 1) +; CHECK-NEXT: %[[M43VOW3:[^ ]+]] = insertelement <12 x float> %[[M43VOW2]], float %[[M43OW01]], i64 3 +; CHECK-NEXT: %[[M43OW11:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 1, i32 1) +; CHECK-NEXT: %[[M43VOW4:[^ ]+]] = insertelement <12 x float> %[[M43VOW3]], float %[[M43OW11]], i64 4 +; CHECK-NEXT: %[[M43OW21:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 2, i32 1) +; CHECK-NEXT: %[[M43VOW5:[^ ]+]] = insertelement <12 x float> %[[M43VOW4]], float %[[M43OW21]], i64 5 +; CHECK-NEXT: %[[M43OW02:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 0, i32 2) +; CHECK-NEXT: %[[M43VOW6:[^ ]+]] = insertelement <12 x float> %[[M43VOW5]], float %[[M43OW02]], i64 6 +; CHECK-NEXT: %[[M43OW12:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 1, i32 2) +; CHECK-NEXT: %[[M43VOW7:[^ ]+]] = insertelement <12 x float> %[[M43VOW6]], float %[[M43OW12]], i64 7 +; CHECK-NEXT: %[[M43OW22:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 2, i32 2) +; CHECK-NEXT: %[[M43VOW8:[^ ]+]] = insertelement <12 x float> %[[M43VOW7]], float %[[M43OW22]], i64 8 +; CHECK-NEXT: %[[M43OW03:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 0, i32 3) +; CHECK-NEXT: %[[M43VOW9:[^ ]+]] = insertelement <12 x float> %[[M43VOW8]], float %[[M43OW03]], i64 9 +; CHECK-NEXT: %[[M43OW13:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 1, i32 3) +; CHECK-NEXT: %[[M43VOW10:[^ ]+]] = insertelement <12 x float> %[[M43VOW9]], float %[[M43OW13]], i64 10 +; CHECK-NEXT: %[[M43OW23:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %[[M43OWHO]], i32 2, i32 3) +; CHECK-NEXT: %{{[^ ]+}} = insertelement <12 x float> %[[M43VOW10]], float %[[M43OW23]], i64 11 + +; CHECK: %[[M34WO00:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO:[^ ]+]], i32 0, i32 0) +; CHECK-NEXT: %[[M34VWO0:[^ ]+]] = insertelement <12 x float> undef, float %[[M34WO00]], i64 0 +; CHECK-NEXT: %[[M34WO01:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 0, i32 1) +; CHECK-NEXT: %[[M34VWO1:[^ ]+]] = insertelement <12 x float> %[[M34VWO0]], float %[[M34WO01]], i64 1 +; CHECK-NEXT: %[[M34WO02:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 0, i32 2) +; CHECK-NEXT: %[[M34VWO2:[^ ]+]] = insertelement <12 x float> %[[M34VWO1]], float %[[M34WO02]], i64 2 +; CHECK-NEXT: %[[M34WO03:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 0, i32 3) +; CHECK-NEXT: %[[M34VWO3:[^ ]+]] = insertelement <12 x float> %[[M34VWO2]], float %[[M34WO03]], i64 3 +; CHECK-NEXT: %[[M34WO10:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 1, i32 0) +; CHECK-NEXT: %[[M34VWO4:[^ ]+]] = insertelement <12 x float> %[[M34VWO3]], float %[[M34WO10]], i64 4 +; CHECK-NEXT: %[[M34WO11:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 1, i32 1) +; CHECK-NEXT: %[[M34VWO5:[^ ]+]] = insertelement <12 x float> %[[M34VWO4]], float %[[M34WO11]], i64 5 +; CHECK-NEXT: %[[M34WO12:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 1, i32 2) +; CHECK-NEXT: %[[M34VWO6:[^ ]+]] = insertelement <12 x float> %[[M34VWO5]], float %[[M34WO12]], i64 6 +; CHECK-NEXT: %[[M34WO13:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 1, i32 3) +; CHECK-NEXT: %[[M34VWO7:[^ ]+]] = insertelement <12 x float> %[[M34VWO6]], float %[[M34WO13]], i64 7 +; CHECK-NEXT: %[[M34WO20:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 2, i32 0) +; CHECK-NEXT: %[[M34VWO8:[^ ]+]] = insertelement <12 x float> %[[M34VWO7]], float %[[M34WO20]], i64 8 +; CHECK-NEXT: %[[M34WO21:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 2, i32 1) +; CHECK-NEXT: %[[M34VWO9:[^ ]+]] = insertelement <12 x float> %[[M34VWO8]], float %[[M34WO21]], i64 9 +; CHECK-NEXT: %[[M34WO22:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 2, i32 2) +; CHECK-NEXT: %[[M34VWO10:[^ ]+]] = insertelement <12 x float> %[[M34VWO9]], float %[[M34WO22]], i64 10 +; CHECK-NEXT: %[[M34WO23:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M34WOHO]], i32 2, i32 3) +; CHECK-NEXT: %{{[^ ]+}} = insertelement <12 x float> %[[M34VWO10]], float %[[M34WO23]], i64 11 + +; CHECK: %[[M43WO00:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO:[^ ]+]], i32 0, i32 0) +; CHECK-NEXT: %[[M43VWO0:[^ ]+]] = insertelement <12 x float> undef, float %[[M43WO00]], i64 0 +; CHECK-NEXT: %[[M43WO10:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 1, i32 0) +; CHECK-NEXT: %[[M43VWO1:[^ ]+]] = insertelement <12 x float> %[[M43VWO0]], float %[[M43WO10]], i64 1 +; CHECK-NEXT: %[[M43WO20:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 2, i32 0) +; CHECK-NEXT: %[[M43VWO2:[^ ]+]] = insertelement <12 x float> %[[M43VWO1]], float %[[M43WO20]], i64 2 +; CHECK-NEXT: %[[M43WO01:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 0, i32 1) +; CHECK-NEXT: %[[M43VWO3:[^ ]+]] = insertelement <12 x float> %[[M43VWO2]], float %[[M43WO01]], i64 3 +; CHECK-NEXT: %[[M43WO11:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 1, i32 1) +; CHECK-NEXT: %[[M43VWO4:[^ ]+]] = insertelement <12 x float> %[[M43VWO3]], float %[[M43WO11]], i64 4 +; CHECK-NEXT: %[[M43WO21:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 2, i32 1) +; CHECK-NEXT: %[[M43VWO5:[^ ]+]] = insertelement <12 x float> %[[M43VWO4]], float %[[M43WO21]], i64 5 +; CHECK-NEXT: %[[M43WO02:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 0, i32 2) +; CHECK-NEXT: %[[M43VWO6:[^ ]+]] = insertelement <12 x float> %[[M43VWO5]], float %[[M43WO02]], i64 6 +; CHECK-NEXT: %[[M43WO12:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 1, i32 2) +; CHECK-NEXT: %[[M43VWO7:[^ ]+]] = insertelement <12 x float> %[[M43VWO6]], float %[[M43WO12]], i64 7 +; CHECK-NEXT: %[[M43WO22:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 2, i32 2) +; CHECK-NEXT: %[[M43VWO8:[^ ]+]] = insertelement <12 x float> %[[M43VWO7]], float %[[M43WO22]], i64 8 +; CHECK-NEXT: %[[M43WO03:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 0, i32 3) +; CHECK-NEXT: %[[M43VWO9:[^ ]+]] = insertelement <12 x float> %[[M43VWO8]], float %[[M43WO03]], i64 9 +; CHECK-NEXT: %[[M43WO13:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 1, i32 3) +; CHECK-NEXT: %[[M43VWO10:[^ ]+]] = insertelement <12 x float> %[[M43VWO9]], float %[[M43WO13]], i64 10 +; CHECK-NEXT: %[[M43WO23:[^ ]+]] = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %[[M43WOHO]], i32 2, i32 3) +; CHECK-NEXT: %{{[^ ]+}} = insertelement <12 x float> %[[M43VWO10]], float %[[M43WO23]], i64 11 + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +for.body.i.lr.ph: + %0 = alloca [12 x float] + %1 = alloca [3 x i32] + %2 = alloca [12 x float] + %3 = alloca [4 x i32] + %4 = alloca [12 x float] + %5 = alloca [3 x i32] + %6 = alloca [12 x float] + %7 = alloca [4 x i32] + %hit = alloca %dx.types.HitObject, align 4 + %8 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !19 ; line:69 col:3 + call void @llvm.lifetime.start(i64 4, i8* %8) #0, !dbg !19 ; line:69 col:3 + %9 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !23 ; line:69 col:17 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32)"(i32 388, %dx.types.HitObject* %hit, i32 1), !dbg !24 ; line:75 col:3 + %10 = call i1 @"dx.hl.op.rn.i1 (i32, %dx.types.HitObject*)"(i32 383, %dx.types.HitObject* %hit), !dbg !25 ; line:80 col:11 + %conv = zext i1 %10 to i32, !dbg !25 ; line:80 col:11 + %11 = call i1 @"dx.hl.op.rn.i1 (i32, %dx.types.HitObject*)"(i32 384, %dx.types.HitObject* %hit), !dbg !26 ; line:81 col:11 + %conv3 = zext i1 %11 to i32, !dbg !26 ; line:81 col:11 + %add4 = add nsw i32 %conv, %conv3, !dbg !27 ; line:81 col:8 + %12 = call i1 @"dx.hl.op.rn.i1 (i32, %dx.types.HitObject*)"(i32 385, %dx.types.HitObject* %hit), !dbg !28 ; line:82 col:11 + %conv6 = zext i1 %12 to i32, !dbg !28 ; line:82 col:11 + %add7 = add nsw i32 %add4, %conv6, !dbg !29 ; line:82 col:8 + %13 = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 365, %dx.types.HitObject* %hit), !dbg !30 ; line:85 col:11 + %add9 = add i32 %add7, %13, !dbg !31 ; line:85 col:8 + %14 = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 366, %dx.types.HitObject* %hit), !dbg !32 ; line:86 col:11 + %add11 = add i32 %add9, %14, !dbg !33 ; line:86 col:8 + %15 = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 368, %dx.types.HitObject* %hit), !dbg !34 ; line:87 col:11 + %add13 = add i32 %add11, %15, !dbg !35 ; line:87 col:8 + %16 = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 367, %dx.types.HitObject* %hit), !dbg !36 ; line:88 col:11 + %add15 = add i32 %add13, %16, !dbg !37 ; line:88 col:8 + %17 = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 373, %dx.types.HitObject* %hit), !dbg !38 ; line:89 col:11 + %add17 = add i32 %add15, %17, !dbg !39 ; line:89 col:8 + %18 = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 377, %dx.types.HitObject* %hit), !dbg !40 ; line:90 col:11 + %add19 = add i32 %add17, %18, !dbg !41 ; line:90 col:8 + %19 = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.HitObject*, i32)"(i32 386, %dx.types.HitObject* %hit, i32 42), !dbg !42 ; line:91 col:11 + %add21 = add i32 %add19, %19, !dbg !43 ; line:91 col:8 + %20 = call <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32 379, %dx.types.HitObject* %hit), !dbg !44 ; line:94 col:11 + %add23 = fadd <3 x float> zeroinitializer, %20, !dbg !45 ; line:94 col:8 + %21 = call <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32 378, %dx.types.HitObject* %hit), !dbg !46 ; line:95 col:11 + %add25 = fadd <3 x float> %add23, %21, !dbg !47 ; line:95 col:8 + %22 = call <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32 370, %dx.types.HitObject* %hit), !dbg !48 ; line:96 col:11 + %add27 = fadd <3 x float> %add25, %22, !dbg !49 ; line:96 col:8 + %23 = call <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32 369, %dx.types.HitObject* %hit), !dbg !50 ; line:97 col:11 + %add29 = fadd <3 x float> %add27, %23, !dbg !51 ; line:97 col:8 + %vsum.0.vec.extract = extractelement <3 x float> %add29, i32 0, !dbg !52 ; line:98 col:11 + %vsum.4.vec.extract = extractelement <3 x float> %add29, i32 1, !dbg !53 ; line:98 col:21 + %add30 = fadd float %vsum.0.vec.extract, %vsum.4.vec.extract, !dbg !54 ; line:98 col:19 + %vsum.8.vec.extract = extractelement <3 x float> %add29, i32 2, !dbg !55 ; line:98 col:31 + %add31 = fadd float %add30, %vsum.8.vec.extract, !dbg !56 ; line:98 col:29 + %add32 = fadd float 0.000000e+00, %add31, !dbg !57 ; line:98 col:8 + %24 = call <12 x float> @"dx.hl.op.rn.<12 x float> (i32, %dx.types.HitObject*)"(i32 371, %dx.types.HitObject* %hit), !dbg !58 ; line:101 col:23 + %row2col = shufflevector <12 x float> %24, <12 x float> %24, <12 x i32> , !dbg !59 ; line:101 col:11 + br label %for.body.7.i.lr.ph, !dbg !60 ; line:61 col:3 + +for.body.7.i.lr.ph: ; preds = %for.cond.cleanup.6.i, %for.body.i.lr.ph + %i.i.0 = phi i32 [ 0, %for.body.i.lr.ph ], [ %inc9.i, %for.cond.cleanup.6.i ] + %h.i.0 = phi float [ 0.000000e+00, %for.body.i.lr.ph ], [ %add.i, %for.cond.cleanup.6.i ] + br label %for.body.7.i, !dbg !63 ; line:62 col:5 + +for.cond.cleanup.6.i: ; preds = %for.body.7.i + %inc9.i = add nsw i32 %i.i.0, 1, !dbg !64 ; line:61 col:26 + %cmp.i = icmp slt i32 %inc9.i, 3, !dbg !65 ; line:61 col:21 + br i1 %cmp.i, label %for.body.7.i.lr.ph, label %for.body.i.8.lr.ph, !dbg !60 ; line:61 col:3 + +for.body.7.i: ; preds = %for.body.7.i.lr.ph, %for.body.7.i + %h.i.263 = phi float [ %h.i.0, %for.body.7.i.lr.ph ], [ %add.i, %for.body.7.i ] + %j.i.0 = phi i32 [ 0, %for.body.7.i.lr.ph ], [ %inc.i, %for.body.7.i ] + %25 = add i32 3, %i.i.0, !dbg !66 ; line:63 col:12 + %26 = add i32 6, %i.i.0, !dbg !66 ; line:63 col:12 + %27 = add i32 9, %i.i.0, !dbg !66 ; line:63 col:12 + %28 = getelementptr [4 x i32], [4 x i32]* %7, i32 0, i32 0, !dbg !66 ; line:63 col:12 + store i32 %i.i.0, i32* %28, !dbg !66 ; line:63 col:12 + %29 = getelementptr [4 x i32], [4 x i32]* %7, i32 0, i32 1, !dbg !66 ; line:63 col:12 + store i32 %25, i32* %29, !dbg !66 ; line:63 col:12 + %30 = getelementptr [4 x i32], [4 x i32]* %7, i32 0, i32 2, !dbg !66 ; line:63 col:12 + store i32 %26, i32* %30, !dbg !66 ; line:63 col:12 + %31 = getelementptr [4 x i32], [4 x i32]* %7, i32 0, i32 3, !dbg !66 ; line:63 col:12 + store i32 %27, i32* %31, !dbg !66 ; line:63 col:12 + %32 = getelementptr [4 x i32], [4 x i32]* %7, i32 0, i32 %j.i.0, !dbg !66 ; line:63 col:12 + %33 = load i32, i32* %32, !dbg !66 ; line:63 col:12 + %34 = extractelement <12 x float> %row2col, i64 0, !dbg !66 ; line:63 col:12 + %35 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 0, !dbg !66 ; line:63 col:12 + store float %34, float* %35, !dbg !66 ; line:63 col:12 + %36 = extractelement <12 x float> %row2col, i64 1, !dbg !66 ; line:63 col:12 + %37 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 1, !dbg !66 ; line:63 col:12 + store float %36, float* %37, !dbg !66 ; line:63 col:12 + %38 = extractelement <12 x float> %row2col, i64 2, !dbg !66 ; line:63 col:12 + %39 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 2, !dbg !66 ; line:63 col:12 + store float %38, float* %39, !dbg !66 ; line:63 col:12 + %40 = extractelement <12 x float> %row2col, i64 3, !dbg !66 ; line:63 col:12 + %41 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 3, !dbg !66 ; line:63 col:12 + store float %40, float* %41, !dbg !66 ; line:63 col:12 + %42 = extractelement <12 x float> %row2col, i64 4, !dbg !66 ; line:63 col:12 + %43 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 4, !dbg !66 ; line:63 col:12 + store float %42, float* %43, !dbg !66 ; line:63 col:12 + %44 = extractelement <12 x float> %row2col, i64 5, !dbg !66 ; line:63 col:12 + %45 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 5, !dbg !66 ; line:63 col:12 + store float %44, float* %45, !dbg !66 ; line:63 col:12 + %46 = extractelement <12 x float> %row2col, i64 6, !dbg !66 ; line:63 col:12 + %47 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 6, !dbg !66 ; line:63 col:12 + store float %46, float* %47, !dbg !66 ; line:63 col:12 + %48 = extractelement <12 x float> %row2col, i64 7, !dbg !66 ; line:63 col:12 + %49 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 7, !dbg !66 ; line:63 col:12 + store float %48, float* %49, !dbg !66 ; line:63 col:12 + %50 = extractelement <12 x float> %row2col, i64 8, !dbg !66 ; line:63 col:12 + %51 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 8, !dbg !66 ; line:63 col:12 + store float %50, float* %51, !dbg !66 ; line:63 col:12 + %52 = extractelement <12 x float> %row2col, i64 9, !dbg !66 ; line:63 col:12 + %53 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 9, !dbg !66 ; line:63 col:12 + store float %52, float* %53, !dbg !66 ; line:63 col:12 + %54 = extractelement <12 x float> %row2col, i64 10, !dbg !66 ; line:63 col:12 + %55 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 10, !dbg !66 ; line:63 col:12 + store float %54, float* %55, !dbg !66 ; line:63 col:12 + %56 = extractelement <12 x float> %row2col, i64 11, !dbg !66 ; line:63 col:12 + %57 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 11, !dbg !66 ; line:63 col:12 + store float %56, float* %57, !dbg !66 ; line:63 col:12 + %58 = getelementptr [12 x float], [12 x float]* %6, i32 0, i32 %33, !dbg !66 ; line:63 col:12 + %59 = load float, float* %58, !dbg !66 ; line:63 col:12 + %add.i = fadd float %h.i.263, %59, !dbg !67 ; line:63 col:9 + %inc.i = add nsw i32 %j.i.0, 1, !dbg !68 ; line:62 col:28 + %cmp3.i = icmp slt i32 %inc.i, 4, !dbg !69 ; line:62 col:23 + br i1 %cmp3.i, label %for.body.7.i, label %for.cond.cleanup.6.i, !dbg !63 ; line:62 col:5 + +for.body.i.8.lr.ph: ; preds = %for.cond.cleanup.6.i + %add35 = fadd float %add32, %add.i, !dbg !70 ; line:101 col:8 + %60 = call <12 x float> @"dx.hl.op.rn.<12 x float> (i32, %dx.types.HitObject*)"(i32 372, %dx.types.HitObject* %hit), !dbg !71 ; line:102 col:23 + %row2col52 = shufflevector <12 x float> %60, <12 x float> %60, <12 x i32> , !dbg !72 ; line:102 col:11 + br label %for.body.7.i.15.lr.ph, !dbg !73 ; line:61 col:3 + +for.body.7.i.15.lr.ph: ; preds = %for.cond.cleanup.6.i.12, %for.body.i.8.lr.ph + %i.i.3.0 = phi i32 [ 0, %for.body.i.8.lr.ph ], [ %inc9.i.11, %for.cond.cleanup.6.i.12 ] + %h.i.2.0 = phi float [ 0.000000e+00, %for.body.i.8.lr.ph ], [ %add.i.13, %for.cond.cleanup.6.i.12 ] + br label %for.body.7.i.15, !dbg !76 ; line:62 col:5 + +for.cond.cleanup.6.i.12: ; preds = %for.body.7.i.15 + %inc9.i.11 = add nsw i32 %i.i.3.0, 1, !dbg !77 ; line:61 col:26 + %cmp.i.6 = icmp slt i32 %inc9.i.11, 4, !dbg !78 ; line:61 col:21 + br i1 %cmp.i.6, label %for.body.7.i.15.lr.ph, label %for.body.i.23.lr.ph, !dbg !73 ; line:61 col:3 + +for.body.7.i.15: ; preds = %for.body.7.i.15.lr.ph, %for.body.7.i.15 + %j.i.5.0 = phi i32 [ 0, %for.body.7.i.15.lr.ph ], [ %inc.i.14, %for.body.7.i.15 ] + %h.i.2.2 = phi float [ %h.i.2.0, %for.body.7.i.15.lr.ph ], [ %add.i.13, %for.body.7.i.15 ] + %61 = add i32 4, %i.i.3.0, !dbg !79 ; line:63 col:12 + %62 = add i32 8, %i.i.3.0, !dbg !79 ; line:63 col:12 + %63 = getelementptr [3 x i32], [3 x i32]* %5, i32 0, i32 0, !dbg !79 ; line:63 col:12 + store i32 %i.i.3.0, i32* %63, !dbg !79 ; line:63 col:12 + %64 = getelementptr [3 x i32], [3 x i32]* %5, i32 0, i32 1, !dbg !79 ; line:63 col:12 + store i32 %61, i32* %64, !dbg !79 ; line:63 col:12 + %65 = getelementptr [3 x i32], [3 x i32]* %5, i32 0, i32 2, !dbg !79 ; line:63 col:12 + store i32 %62, i32* %65, !dbg !79 ; line:63 col:12 + %66 = getelementptr [3 x i32], [3 x i32]* %5, i32 0, i32 %j.i.5.0, !dbg !79 ; line:63 col:12 + %67 = load i32, i32* %66, !dbg !79 ; line:63 col:12 + %68 = extractelement <12 x float> %row2col52, i64 0, !dbg !79 ; line:63 col:12 + %69 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 0, !dbg !79 ; line:63 col:12 + store float %68, float* %69, !dbg !79 ; line:63 col:12 + %70 = extractelement <12 x float> %row2col52, i64 1, !dbg !79 ; line:63 col:12 + %71 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 1, !dbg !79 ; line:63 col:12 + store float %70, float* %71, !dbg !79 ; line:63 col:12 + %72 = extractelement <12 x float> %row2col52, i64 2, !dbg !79 ; line:63 col:12 + %73 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 2, !dbg !79 ; line:63 col:12 + store float %72, float* %73, !dbg !79 ; line:63 col:12 + %74 = extractelement <12 x float> %row2col52, i64 3, !dbg !79 ; line:63 col:12 + %75 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 3, !dbg !79 ; line:63 col:12 + store float %74, float* %75, !dbg !79 ; line:63 col:12 + %76 = extractelement <12 x float> %row2col52, i64 4, !dbg !79 ; line:63 col:12 + %77 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 4, !dbg !79 ; line:63 col:12 + store float %76, float* %77, !dbg !79 ; line:63 col:12 + %78 = extractelement <12 x float> %row2col52, i64 5, !dbg !79 ; line:63 col:12 + %79 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 5, !dbg !79 ; line:63 col:12 + store float %78, float* %79, !dbg !79 ; line:63 col:12 + %80 = extractelement <12 x float> %row2col52, i64 6, !dbg !79 ; line:63 col:12 + %81 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 6, !dbg !79 ; line:63 col:12 + store float %80, float* %81, !dbg !79 ; line:63 col:12 + %82 = extractelement <12 x float> %row2col52, i64 7, !dbg !79 ; line:63 col:12 + %83 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 7, !dbg !79 ; line:63 col:12 + store float %82, float* %83, !dbg !79 ; line:63 col:12 + %84 = extractelement <12 x float> %row2col52, i64 8, !dbg !79 ; line:63 col:12 + %85 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 8, !dbg !79 ; line:63 col:12 + store float %84, float* %85, !dbg !79 ; line:63 col:12 + %86 = extractelement <12 x float> %row2col52, i64 9, !dbg !79 ; line:63 col:12 + %87 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 9, !dbg !79 ; line:63 col:12 + store float %86, float* %87, !dbg !79 ; line:63 col:12 + %88 = extractelement <12 x float> %row2col52, i64 10, !dbg !79 ; line:63 col:12 + %89 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 10, !dbg !79 ; line:63 col:12 + store float %88, float* %89, !dbg !79 ; line:63 col:12 + %90 = extractelement <12 x float> %row2col52, i64 11, !dbg !79 ; line:63 col:12 + %91 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 11, !dbg !79 ; line:63 col:12 + store float %90, float* %91, !dbg !79 ; line:63 col:12 + %92 = getelementptr [12 x float], [12 x float]* %4, i32 0, i32 %67, !dbg !79 ; line:63 col:12 + %93 = load float, float* %92, !dbg !79 ; line:63 col:12 + %add.i.13 = fadd float %h.i.2.2, %93, !dbg !80 ; line:63 col:9 + %inc.i.14 = add nsw i32 %j.i.5.0, 1, !dbg !81 ; line:62 col:28 + %cmp3.i.9 = icmp slt i32 %inc.i.14, 3, !dbg !82 ; line:62 col:23 + br i1 %cmp3.i.9, label %for.body.7.i.15, label %for.cond.cleanup.6.i.12, !dbg !76 ; line:62 col:5 + +for.body.i.23.lr.ph: ; preds = %for.cond.cleanup.6.i.12 + %add38 = fadd float %add35, %add.i.13, !dbg !83 ; line:102 col:8 + %94 = call <12 x float> @"dx.hl.op.rn.<12 x float> (i32, %dx.types.HitObject*)"(i32 380, %dx.types.HitObject* %hit), !dbg !84 ; line:103 col:23 + %row2col53 = shufflevector <12 x float> %94, <12 x float> %94, <12 x i32> , !dbg !85 ; line:103 col:11 + br label %for.body.7.i.30.lr.ph, !dbg !86 ; line:61 col:3 + +for.body.7.i.30.lr.ph: ; preds = %for.cond.cleanup.6.i.27, %for.body.i.23.lr.ph + %i.i.18.0 = phi i32 [ 0, %for.body.i.23.lr.ph ], [ %inc9.i.26, %for.cond.cleanup.6.i.27 ] + %h.i.17.0 = phi float [ 0.000000e+00, %for.body.i.23.lr.ph ], [ %add.i.28, %for.cond.cleanup.6.i.27 ] + br label %for.body.7.i.30, !dbg !88 ; line:62 col:5 + +for.cond.cleanup.6.i.27: ; preds = %for.body.7.i.30 + %inc9.i.26 = add nsw i32 %i.i.18.0, 1, !dbg !89 ; line:61 col:26 + %cmp.i.21 = icmp slt i32 %inc9.i.26, 3, !dbg !90 ; line:61 col:21 + br i1 %cmp.i.21, label %for.body.7.i.30.lr.ph, label %for.body.i.39.lr.ph, !dbg !86 ; line:61 col:3 + +for.body.7.i.30: ; preds = %for.body.7.i.30.lr.ph, %for.body.7.i.30 + %j.i.20.0 = phi i32 [ 0, %for.body.7.i.30.lr.ph ], [ %inc.i.29, %for.body.7.i.30 ] + %h.i.17.2 = phi float [ %h.i.17.0, %for.body.7.i.30.lr.ph ], [ %add.i.28, %for.body.7.i.30 ] + %95 = add i32 3, %i.i.18.0, !dbg !91 ; line:63 col:12 + %96 = add i32 6, %i.i.18.0, !dbg !91 ; line:63 col:12 + %97 = add i32 9, %i.i.18.0, !dbg !91 ; line:63 col:12 + %98 = getelementptr [4 x i32], [4 x i32]* %3, i32 0, i32 0, !dbg !91 ; line:63 col:12 + store i32 %i.i.18.0, i32* %98, !dbg !91 ; line:63 col:12 + %99 = getelementptr [4 x i32], [4 x i32]* %3, i32 0, i32 1, !dbg !91 ; line:63 col:12 + store i32 %95, i32* %99, !dbg !91 ; line:63 col:12 + %100 = getelementptr [4 x i32], [4 x i32]* %3, i32 0, i32 2, !dbg !91 ; line:63 col:12 + store i32 %96, i32* %100, !dbg !91 ; line:63 col:12 + %101 = getelementptr [4 x i32], [4 x i32]* %3, i32 0, i32 3, !dbg !91 ; line:63 col:12 + store i32 %97, i32* %101, !dbg !91 ; line:63 col:12 + %102 = getelementptr [4 x i32], [4 x i32]* %3, i32 0, i32 %j.i.20.0, !dbg !91 ; line:63 col:12 + %103 = load i32, i32* %102, !dbg !91 ; line:63 col:12 + %104 = extractelement <12 x float> %row2col53, i64 0, !dbg !91 ; line:63 col:12 + %105 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 0, !dbg !91 ; line:63 col:12 + store float %104, float* %105, !dbg !91 ; line:63 col:12 + %106 = extractelement <12 x float> %row2col53, i64 1, !dbg !91 ; line:63 col:12 + %107 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 1, !dbg !91 ; line:63 col:12 + store float %106, float* %107, !dbg !91 ; line:63 col:12 + %108 = extractelement <12 x float> %row2col53, i64 2, !dbg !91 ; line:63 col:12 + %109 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 2, !dbg !91 ; line:63 col:12 + store float %108, float* %109, !dbg !91 ; line:63 col:12 + %110 = extractelement <12 x float> %row2col53, i64 3, !dbg !91 ; line:63 col:12 + %111 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 3, !dbg !91 ; line:63 col:12 + store float %110, float* %111, !dbg !91 ; line:63 col:12 + %112 = extractelement <12 x float> %row2col53, i64 4, !dbg !91 ; line:63 col:12 + %113 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 4, !dbg !91 ; line:63 col:12 + store float %112, float* %113, !dbg !91 ; line:63 col:12 + %114 = extractelement <12 x float> %row2col53, i64 5, !dbg !91 ; line:63 col:12 + %115 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 5, !dbg !91 ; line:63 col:12 + store float %114, float* %115, !dbg !91 ; line:63 col:12 + %116 = extractelement <12 x float> %row2col53, i64 6, !dbg !91 ; line:63 col:12 + %117 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 6, !dbg !91 ; line:63 col:12 + store float %116, float* %117, !dbg !91 ; line:63 col:12 + %118 = extractelement <12 x float> %row2col53, i64 7, !dbg !91 ; line:63 col:12 + %119 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 7, !dbg !91 ; line:63 col:12 + store float %118, float* %119, !dbg !91 ; line:63 col:12 + %120 = extractelement <12 x float> %row2col53, i64 8, !dbg !91 ; line:63 col:12 + %121 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 8, !dbg !91 ; line:63 col:12 + store float %120, float* %121, !dbg !91 ; line:63 col:12 + %122 = extractelement <12 x float> %row2col53, i64 9, !dbg !91 ; line:63 col:12 + %123 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 9, !dbg !91 ; line:63 col:12 + store float %122, float* %123, !dbg !91 ; line:63 col:12 + %124 = extractelement <12 x float> %row2col53, i64 10, !dbg !91 ; line:63 col:12 + %125 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 10, !dbg !91 ; line:63 col:12 + store float %124, float* %125, !dbg !91 ; line:63 col:12 + %126 = extractelement <12 x float> %row2col53, i64 11, !dbg !91 ; line:63 col:12 + %127 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 11, !dbg !91 ; line:63 col:12 + store float %126, float* %127, !dbg !91 ; line:63 col:12 + %128 = getelementptr [12 x float], [12 x float]* %2, i32 0, i32 %103, !dbg !91 ; line:63 col:12 + %129 = load float, float* %128, !dbg !91 ; line:63 col:12 + %add.i.28 = fadd float %h.i.17.2, %129, !dbg !92 ; line:63 col:9 + %inc.i.29 = add nsw i32 %j.i.20.0, 1, !dbg !93 ; line:62 col:28 + %cmp3.i.24 = icmp slt i32 %inc.i.29, 4, !dbg !94 ; line:62 col:23 + br i1 %cmp3.i.24, label %for.body.7.i.30, label %for.cond.cleanup.6.i.27, !dbg !88 ; line:62 col:5 + +for.body.i.39.lr.ph: ; preds = %for.cond.cleanup.6.i.27 + %add41 = fadd float %add38, %add.i.28, !dbg !95 ; line:103 col:8 + %130 = call <12 x float> @"dx.hl.op.rn.<12 x float> (i32, %dx.types.HitObject*)"(i32 381, %dx.types.HitObject* %hit), !dbg !96 ; line:104 col:23 + %row2col54 = shufflevector <12 x float> %130, <12 x float> %130, <12 x i32> , !dbg !97 ; line:104 col:11 + br label %for.body.7.i.46.lr.ph, !dbg !98 ; line:61 col:3 + +for.body.7.i.46.lr.ph: ; preds = %for.cond.cleanup.6.i.43, %for.body.i.39.lr.ph + %i.i.34.0 = phi i32 [ 0, %for.body.i.39.lr.ph ], [ %inc9.i.42, %for.cond.cleanup.6.i.43 ] + %h.i.33.0 = phi float [ 0.000000e+00, %for.body.i.39.lr.ph ], [ %add.i.44, %for.cond.cleanup.6.i.43 ] + br label %for.body.7.i.46, !dbg !100 ; line:62 col:5 + +for.cond.cleanup.6.i.43: ; preds = %for.body.7.i.46 + %inc9.i.42 = add nsw i32 %i.i.34.0, 1, !dbg !101 ; line:61 col:26 + %cmp.i.37 = icmp slt i32 %inc9.i.42, 4, !dbg !102 ; line:61 col:21 + br i1 %cmp.i.37, label %for.body.7.i.46.lr.ph, label %"\01??$hashM@$03$02@@YAMV?$matrix@M$03$02@@@Z.exit.47", !dbg !98 ; line:61 col:3 + +for.body.7.i.46: ; preds = %for.body.7.i.46.lr.ph, %for.body.7.i.46 + %j.i.36.0 = phi i32 [ 0, %for.body.7.i.46.lr.ph ], [ %inc.i.45, %for.body.7.i.46 ] + %h.i.33.2 = phi float [ %h.i.33.0, %for.body.7.i.46.lr.ph ], [ %add.i.44, %for.body.7.i.46 ] + %131 = add i32 4, %i.i.34.0, !dbg !103 ; line:63 col:12 + %132 = add i32 8, %i.i.34.0, !dbg !103 ; line:63 col:12 + %133 = getelementptr [3 x i32], [3 x i32]* %1, i32 0, i32 0, !dbg !103 ; line:63 col:12 + store i32 %i.i.34.0, i32* %133, !dbg !103 ; line:63 col:12 + %134 = getelementptr [3 x i32], [3 x i32]* %1, i32 0, i32 1, !dbg !103 ; line:63 col:12 + store i32 %131, i32* %134, !dbg !103 ; line:63 col:12 + %135 = getelementptr [3 x i32], [3 x i32]* %1, i32 0, i32 2, !dbg !103 ; line:63 col:12 + store i32 %132, i32* %135, !dbg !103 ; line:63 col:12 + %136 = getelementptr [3 x i32], [3 x i32]* %1, i32 0, i32 %j.i.36.0, !dbg !103 ; line:63 col:12 + %137 = load i32, i32* %136, !dbg !103 ; line:63 col:12 + %138 = extractelement <12 x float> %row2col54, i64 0, !dbg !103 ; line:63 col:12 + %139 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 0, !dbg !103 ; line:63 col:12 + store float %138, float* %139, !dbg !103 ; line:63 col:12 + %140 = extractelement <12 x float> %row2col54, i64 1, !dbg !103 ; line:63 col:12 + %141 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 1, !dbg !103 ; line:63 col:12 + store float %140, float* %141, !dbg !103 ; line:63 col:12 + %142 = extractelement <12 x float> %row2col54, i64 2, !dbg !103 ; line:63 col:12 + %143 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 2, !dbg !103 ; line:63 col:12 + store float %142, float* %143, !dbg !103 ; line:63 col:12 + %144 = extractelement <12 x float> %row2col54, i64 3, !dbg !103 ; line:63 col:12 + %145 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 3, !dbg !103 ; line:63 col:12 + store float %144, float* %145, !dbg !103 ; line:63 col:12 + %146 = extractelement <12 x float> %row2col54, i64 4, !dbg !103 ; line:63 col:12 + %147 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 4, !dbg !103 ; line:63 col:12 + store float %146, float* %147, !dbg !103 ; line:63 col:12 + %148 = extractelement <12 x float> %row2col54, i64 5, !dbg !103 ; line:63 col:12 + %149 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 5, !dbg !103 ; line:63 col:12 + store float %148, float* %149, !dbg !103 ; line:63 col:12 + %150 = extractelement <12 x float> %row2col54, i64 6, !dbg !103 ; line:63 col:12 + %151 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 6, !dbg !103 ; line:63 col:12 + store float %150, float* %151, !dbg !103 ; line:63 col:12 + %152 = extractelement <12 x float> %row2col54, i64 7, !dbg !103 ; line:63 col:12 + %153 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 7, !dbg !103 ; line:63 col:12 + store float %152, float* %153, !dbg !103 ; line:63 col:12 + %154 = extractelement <12 x float> %row2col54, i64 8, !dbg !103 ; line:63 col:12 + %155 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 8, !dbg !103 ; line:63 col:12 + store float %154, float* %155, !dbg !103 ; line:63 col:12 + %156 = extractelement <12 x float> %row2col54, i64 9, !dbg !103 ; line:63 col:12 + %157 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 9, !dbg !103 ; line:63 col:12 + store float %156, float* %157, !dbg !103 ; line:63 col:12 + %158 = extractelement <12 x float> %row2col54, i64 10, !dbg !103 ; line:63 col:12 + %159 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 10, !dbg !103 ; line:63 col:12 + store float %158, float* %159, !dbg !103 ; line:63 col:12 + %160 = extractelement <12 x float> %row2col54, i64 11, !dbg !103 ; line:63 col:12 + %161 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 11, !dbg !103 ; line:63 col:12 + store float %160, float* %161, !dbg !103 ; line:63 col:12 + %162 = getelementptr [12 x float], [12 x float]* %0, i32 0, i32 %137, !dbg !103 ; line:63 col:12 + %163 = load float, float* %162, !dbg !103 ; line:63 col:12 + %add.i.44 = fadd float %h.i.33.2, %163, !dbg !104 ; line:63 col:9 + %inc.i.45 = add nsw i32 %j.i.36.0, 1, !dbg !105 ; line:62 col:28 + %cmp3.i.40 = icmp slt i32 %inc.i.45, 3, !dbg !106 ; line:62 col:23 + br i1 %cmp3.i.40, label %for.body.7.i.46, label %for.cond.cleanup.6.i.43, !dbg !100 ; line:62 col:5 + +"\01??$hashM@$03$02@@YAMV?$matrix@M$03$02@@@Z.exit.47": ; preds = %for.cond.cleanup.6.i.43 + %add44 = fadd float %add41, %add.i.44, !dbg !107 ; line:104 col:8 + %164 = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 374, %dx.types.HitObject* %hit), !dbg !108 ; line:107 col:11 + %add46 = add i32 %add21, %164, !dbg !109 ; line:107 col:8 + %165 = call float @"dx.hl.op.rn.float (i32, %dx.types.HitObject*)"(i32 376, %dx.types.HitObject* %hit), !dbg !110 ; line:108 col:11 + %add48 = fadd float %add44, %165, !dbg !111 ; line:108 col:8 + %166 = call float @"dx.hl.op.rn.float (i32, %dx.types.HitObject*)"(i32 375, %dx.types.HitObject* %hit), !dbg !112 ; line:109 col:11 + %add50 = fadd float %add48, %166, !dbg !113 ; line:109 col:8 + %167 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !dbg !114 ; line:111 col:3 + %168 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %167), !dbg !114 ; line:111 col:3 + %169 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %168, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !114 ; line:111 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32 277, %dx.types.Handle %169, i32 0, float %add50), !dbg !114 ; line:111 col:3 + %170 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !dbg !115 ; line:112 col:3 + %171 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %170), !dbg !115 ; line:112 col:3 + %172 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %171, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !115 ; line:112 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32)"(i32 277, %dx.types.Handle %172, i32 4, i32 %add46), !dbg !115 ; line:112 col:3 + %173 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !116 ; line:113 col:1 + call void @llvm.lifetime.end(i64 4, i8* %173) #0, !dbg !116 ; line:113 col:1 + ret void, !dbg !116 ; line:113 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32)"(i32, %dx.types.HitObject*, i32) #0 + +; Function Attrs: nounwind readnone +declare i1 @"dx.hl.op.rn.i1 (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #1 + +; Function Attrs: nounwind readnone +declare i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #1 + +; Function Attrs: nounwind readonly +declare i32 @"dx.hl.op.ro.i32 (i32, %dx.types.HitObject*, i32)"(i32, %dx.types.HitObject*, i32) #2 + +; Function Attrs: nounwind readnone +declare <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #1 + +; Function Attrs: nounwind readnone +declare float @"dx.hl.op.rn.float (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #1 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32, %dx.types.Handle, i32, float) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32, %struct.RWByteAddressBuffer) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer) #1 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32)"(i32, %dx.types.Handle, i32, i32) #0 + +; Function Attrs: nounwind readnone +declare <12 x float> @"dx.hl.op.rn.<12 x float> (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !8} +!dx.entryPoints = !{!12} +!dx.fnprops = !{!16} +!dx.options = !{!17, !18} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.4891 (staging/ser_hlslaccessors_patch, 1ca27ee12)"} +!3 = !{i32 1, i32 9} +!4 = !{!"lib", i32 6, i32 9} +!5 = !{i32 0, %"class.dx::HitObject" undef, !6} +!6 = !{i32 4, !7} +!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!8 = !{i32 1, void ()* @"\01?main@@YAXXZ", !9} +!9 = !{!10} +!10 = !{i32 1, !11, !11} +!11 = !{} +!12 = !{null, !"", null, !13, null} +!13 = !{null, !14, null, null} +!14 = !{!15} +!15 = !{i32 0, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !"outbuf", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!16 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!17 = !{i32 -2147483584} +!18 = !{i32 -1} +!19 = !DILocation(line: 69, column: 3, scope: !20) +!20 = !DISubprogram(name: "main", scope: !21, file: !21, line: 68, type: !22, isLocal: false, isDefinition: true, scopeLine: 68, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!21 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_accessors.hlsl", directory: "") +!22 = !DISubroutineType(types: !11) +!23 = !DILocation(line: 69, column: 17, scope: !20) +!24 = !DILocation(line: 75, column: 3, scope: !20) +!25 = !DILocation(line: 80, column: 11, scope: !20) +!26 = !DILocation(line: 81, column: 11, scope: !20) +!27 = !DILocation(line: 81, column: 8, scope: !20) +!28 = !DILocation(line: 82, column: 11, scope: !20) +!29 = !DILocation(line: 82, column: 8, scope: !20) +!30 = !DILocation(line: 85, column: 11, scope: !20) +!31 = !DILocation(line: 85, column: 8, scope: !20) +!32 = !DILocation(line: 86, column: 11, scope: !20) +!33 = !DILocation(line: 86, column: 8, scope: !20) +!34 = !DILocation(line: 87, column: 11, scope: !20) +!35 = !DILocation(line: 87, column: 8, scope: !20) +!36 = !DILocation(line: 88, column: 11, scope: !20) +!37 = !DILocation(line: 88, column: 8, scope: !20) +!38 = !DILocation(line: 89, column: 11, scope: !20) +!39 = !DILocation(line: 89, column: 8, scope: !20) +!40 = !DILocation(line: 90, column: 11, scope: !20) +!41 = !DILocation(line: 90, column: 8, scope: !20) +!42 = !DILocation(line: 91, column: 11, scope: !20) +!43 = !DILocation(line: 91, column: 8, scope: !20) +!44 = !DILocation(line: 94, column: 11, scope: !20) +!45 = !DILocation(line: 94, column: 8, scope: !20) +!46 = !DILocation(line: 95, column: 11, scope: !20) +!47 = !DILocation(line: 95, column: 8, scope: !20) +!48 = !DILocation(line: 96, column: 11, scope: !20) +!49 = !DILocation(line: 96, column: 8, scope: !20) +!50 = !DILocation(line: 97, column: 11, scope: !20) +!51 = !DILocation(line: 97, column: 8, scope: !20) +!52 = !DILocation(line: 98, column: 11, scope: !20) +!53 = !DILocation(line: 98, column: 21, scope: !20) +!54 = !DILocation(line: 98, column: 19, scope: !20) +!55 = !DILocation(line: 98, column: 31, scope: !20) +!56 = !DILocation(line: 98, column: 29, scope: !20) +!57 = !DILocation(line: 98, column: 8, scope: !20) +!58 = !DILocation(line: 101, column: 23, scope: !20) +!59 = !DILocation(line: 101, column: 11, scope: !20) +!60 = !DILocation(line: 61, column: 3, scope: !61, inlinedAt: !62) +!61 = !DISubprogram(name: "hashM<3, 4>", scope: !21, file: !21, line: 59, type: !22, isLocal: false, isDefinition: true, scopeLine: 59, flags: DIFlagPrototyped, isOptimized: false) +!62 = distinct !DILocation(line: 101, column: 11, scope: !20) +!63 = !DILocation(line: 62, column: 5, scope: !61, inlinedAt: !62) +!64 = !DILocation(line: 61, column: 26, scope: !61, inlinedAt: !62) +!65 = !DILocation(line: 61, column: 21, scope: !61, inlinedAt: !62) +!66 = !DILocation(line: 63, column: 12, scope: !61, inlinedAt: !62) +!67 = !DILocation(line: 63, column: 9, scope: !61, inlinedAt: !62) +!68 = !DILocation(line: 62, column: 28, scope: !61, inlinedAt: !62) +!69 = !DILocation(line: 62, column: 23, scope: !61, inlinedAt: !62) +!70 = !DILocation(line: 101, column: 8, scope: !20) +!71 = !DILocation(line: 102, column: 23, scope: !20) +!72 = !DILocation(line: 102, column: 11, scope: !20) +!73 = !DILocation(line: 61, column: 3, scope: !74, inlinedAt: !75) +!74 = !DISubprogram(name: "hashM<4, 3>", scope: !21, file: !21, line: 59, type: !22, isLocal: false, isDefinition: true, scopeLine: 59, flags: DIFlagPrototyped, isOptimized: false) +!75 = distinct !DILocation(line: 102, column: 11, scope: !20) +!76 = !DILocation(line: 62, column: 5, scope: !74, inlinedAt: !75) +!77 = !DILocation(line: 61, column: 26, scope: !74, inlinedAt: !75) +!78 = !DILocation(line: 61, column: 21, scope: !74, inlinedAt: !75) +!79 = !DILocation(line: 63, column: 12, scope: !74, inlinedAt: !75) +!80 = !DILocation(line: 63, column: 9, scope: !74, inlinedAt: !75) +!81 = !DILocation(line: 62, column: 28, scope: !74, inlinedAt: !75) +!82 = !DILocation(line: 62, column: 23, scope: !74, inlinedAt: !75) +!83 = !DILocation(line: 102, column: 8, scope: !20) +!84 = !DILocation(line: 103, column: 23, scope: !20) +!85 = !DILocation(line: 103, column: 11, scope: !20) +!86 = !DILocation(line: 61, column: 3, scope: !61, inlinedAt: !87) +!87 = distinct !DILocation(line: 103, column: 11, scope: !20) +!88 = !DILocation(line: 62, column: 5, scope: !61, inlinedAt: !87) +!89 = !DILocation(line: 61, column: 26, scope: !61, inlinedAt: !87) +!90 = !DILocation(line: 61, column: 21, scope: !61, inlinedAt: !87) +!91 = !DILocation(line: 63, column: 12, scope: !61, inlinedAt: !87) +!92 = !DILocation(line: 63, column: 9, scope: !61, inlinedAt: !87) +!93 = !DILocation(line: 62, column: 28, scope: !61, inlinedAt: !87) +!94 = !DILocation(line: 62, column: 23, scope: !61, inlinedAt: !87) +!95 = !DILocation(line: 103, column: 8, scope: !20) +!96 = !DILocation(line: 104, column: 23, scope: !20) +!97 = !DILocation(line: 104, column: 11, scope: !20) +!98 = !DILocation(line: 61, column: 3, scope: !74, inlinedAt: !99) +!99 = distinct !DILocation(line: 104, column: 11, scope: !20) +!100 = !DILocation(line: 62, column: 5, scope: !74, inlinedAt: !99) +!101 = !DILocation(line: 61, column: 26, scope: !74, inlinedAt: !99) +!102 = !DILocation(line: 61, column: 21, scope: !74, inlinedAt: !99) +!103 = !DILocation(line: 63, column: 12, scope: !74, inlinedAt: !99) +!104 = !DILocation(line: 63, column: 9, scope: !74, inlinedAt: !99) +!105 = !DILocation(line: 62, column: 28, scope: !74, inlinedAt: !99) +!106 = !DILocation(line: 62, column: 23, scope: !74, inlinedAt: !99) +!107 = !DILocation(line: 104, column: 8, scope: !20) +!108 = !DILocation(line: 107, column: 11, scope: !20) +!109 = !DILocation(line: 107, column: 8, scope: !20) +!110 = !DILocation(line: 108, column: 11, scope: !20) +!111 = !DILocation(line: 108, column: 8, scope: !20) +!112 = !DILocation(line: 109, column: 11, scope: !20) +!113 = !DILocation(line: 109, column: 8, scope: !20) +!114 = !DILocation(line: 111, column: 3, scope: !20) +!115 = !DILocation(line: 112, column: 3, scope: !20) +!116 = !DILocation(line: 113, column: 1, scope: !20) diff --git a/tools/clang/test/DXC/Passes/DxilGen/hitobject_attributes_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/hitobject_attributes_dxilgen.ll new file mode 100644 index 0000000000..3488a3df03 --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/hitobject_attributes_dxilgen.ll @@ -0,0 +1,151 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s +; REQUIRES: dxil-1-9 + +; +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; outbuf UAV byte r/w U0u4294967295,space4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RWByteAddressBuffer = type { i32 } +%dx.types.HitObject = type { i8* } +%struct.CustomAttrs = type { <4 x float>, i32 } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.dx::HitObject" = type { i32 } + +@"\01?outbuf@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4 + +; CHECK: %[[ATTRA:[^ ]+]] = alloca %struct.CustomAttrs, align 4 +; CHECK: call void @dx.op.hitObject_Attributes.struct.CustomAttrs(i32 289, %dx.types.HitObject %{{[^ ]+}}, %struct.CustomAttrs* %[[ATTRA]]) + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +entry: + %hit = alloca %dx.types.HitObject, align 4 + %attrs = alloca %struct.CustomAttrs, align 4 + %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !21 ; line:29 col:3 + call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !21 ; line:29 col:3 + %1 = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %hit), !dbg !25 ; line:29 col:17 + %2 = bitcast %struct.CustomAttrs* %attrs to i8*, !dbg !26 ; line:30 col:3 + call void @llvm.lifetime.start(i64 20, i8* %2) #0, !dbg !26 ; line:30 col:3 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.CustomAttrs*)"(i32 364, %dx.types.HitObject* %hit, %struct.CustomAttrs* %attrs), !dbg !27 ; line:31 col:3 + %v = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 0, !dbg !28 ; line:32 col:21 + %3 = load <4 x float>, <4 x float>* %v, align 4, !dbg !29 ; line:32 col:15 + %4 = extractelement <4 x float> %3, i32 0, !dbg !29 ; line:32 col:15 + %v1 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 0, !dbg !30 ; line:32 col:33 + %5 = load <4 x float>, <4 x float>* %v1, align 4, !dbg !31 ; line:32 col:27 + %6 = extractelement <4 x float> %5, i32 1, !dbg !31 ; line:32 col:27 + %add = fadd float %4, %6, !dbg !32 ; line:32 col:25 + %v2 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 0, !dbg !33 ; line:32 col:45 + %7 = load <4 x float>, <4 x float>* %v2, align 4, !dbg !34 ; line:32 col:39 + %8 = extractelement <4 x float> %7, i32 2, !dbg !34 ; line:32 col:39 + %add3 = fadd float %add, %8, !dbg !35 ; line:32 col:37 + %v4 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 0, !dbg !36 ; line:32 col:57 + %9 = load <4 x float>, <4 x float>* %v4, align 4, !dbg !37 ; line:32 col:51 + %10 = extractelement <4 x float> %9, i32 3, !dbg !37 ; line:32 col:51 + %add5 = fadd float %add3, %10, !dbg !38 ; line:32 col:49 + %y = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 1, !dbg !39 ; line:32 col:69 + %11 = load i32, i32* %y, align 4, !dbg !39, !tbaa !40 ; line:32 col:69 + %conv = sitofp i32 %11 to float, !dbg !44 ; line:32 col:63 + %add6 = fadd float %add5, %conv, !dbg !45 ; line:32 col:61 + %12 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !dbg !46 ; line:33 col:3 + %13 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %12), !dbg !46 ; line:33 col:3 + %14 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %13, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !46 ; line:33 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32 277, %dx.types.Handle %14, i32 0, float %add6), !dbg !46 ; line:33 col:3 + %15 = bitcast %struct.CustomAttrs* %attrs to i8*, !dbg !47 ; line:34 col:1 + call void @llvm.lifetime.end(i64 20, i8* %15) #0, !dbg !47 ; line:34 col:1 + %16 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !47 ; line:34 col:1 + call void @llvm.lifetime.end(i64 4, i8* %16) #0, !dbg !47 ; line:34 col:1 + ret void, !dbg !47 ; line:34 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.CustomAttrs*)"(i32, %dx.types.HitObject*, %struct.CustomAttrs*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32, %dx.types.Handle, i32, float) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32, %struct.RWByteAddressBuffer) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!dx.version = !{!2} +!dx.valver = !{!2} +!dx.shaderModel = !{!3} +!dx.typeAnnotations = !{!4, !10} +!dx.entryPoints = !{!14} +!dx.fnprops = !{!18} +!dx.options = !{!19, !20} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{i32 1, i32 9} +!3 = !{!"lib", i32 6, i32 9} +!4 = !{i32 0, %"class.dx::HitObject" undef, !5, %struct.CustomAttrs undef, !7} +!5 = !{i32 4, !6} +!6 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!7 = !{i32 20, !8, !9} +!8 = !{i32 6, !"v", i32 3, i32 0, i32 7, i32 9, i32 13, i32 4} +!9 = !{i32 6, !"y", i32 3, i32 16, i32 7, i32 4} +!10 = !{i32 1, void ()* @"\01?main@@YAXXZ", !11} +!11 = !{!12} +!12 = !{i32 1, !13, !13} +!13 = !{} +!14 = !{null, !"", null, !15, null} +!15 = !{null, !16, null, null} +!16 = !{!17} +!17 = !{i32 0, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !"outbuf", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!18 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!19 = !{i32 -2147483584} +!20 = !{i32 -1} +!21 = !DILocation(line: 29, column: 3, scope: !22) +!22 = !DISubprogram(name: "main", scope: !23, file: !23, line: 28, type: !24, isLocal: false, isDefinition: true, scopeLine: 28, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!23 = !DIFile(filename: "tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes.hlsl", directory: "") +!24 = !DISubroutineType(types: !13) +!25 = !DILocation(line: 29, column: 17, scope: !22) +!26 = !DILocation(line: 30, column: 3, scope: !22) +!27 = !DILocation(line: 31, column: 3, scope: !22) +!28 = !DILocation(line: 32, column: 21, scope: !22) +!29 = !DILocation(line: 32, column: 15, scope: !22) +!30 = !DILocation(line: 32, column: 33, scope: !22) +!31 = !DILocation(line: 32, column: 27, scope: !22) +!32 = !DILocation(line: 32, column: 25, scope: !22) +!33 = !DILocation(line: 32, column: 45, scope: !22) +!34 = !DILocation(line: 32, column: 39, scope: !22) +!35 = !DILocation(line: 32, column: 37, scope: !22) +!36 = !DILocation(line: 32, column: 57, scope: !22) +!37 = !DILocation(line: 32, column: 51, scope: !22) +!38 = !DILocation(line: 32, column: 49, scope: !22) +!39 = !DILocation(line: 32, column: 69, scope: !22) +!40 = !{!41, !41, i64 0} +!41 = !{!"int", !42, i64 0} +!42 = !{!"omnipotent char", !43, i64 0} +!43 = !{!"Simple C/C++ TBAA"} +!44 = !DILocation(line: 32, column: 63, scope: !22) +!45 = !DILocation(line: 32, column: 61, scope: !22) +!46 = !DILocation(line: 33, column: 3, scope: !22) +!47 = !DILocation(line: 34, column: 1, scope: !22) diff --git a/tools/clang/test/DXC/Passes/DxilGen/hitobject_fromrayquery_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/hitobject_fromrayquery_dxilgen.ll new file mode 100644 index 0000000000..0ae8e36fa7 --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/hitobject_fromrayquery_dxilgen.ll @@ -0,0 +1,146 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s +; REQUIRES: dxil-1-9 + +; +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; RTAS texture i32 ras T0t4294967295,space4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%struct.CustomAttrs = type { float, float } +%dx.types.HitObject = type { i8* } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.RWStructuredBuffer" = type { float } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%"class.dx::HitObject" = type { i32 } +%"class.RayQuery<5, 0>" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 + +; CHECK: %[[ATTRA:[^ ]+]] = alloca %struct.CustomAttrs +; CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQH:[^ ]+]], %dx.types.Handle %{{[^ ]+}}, i32 0, i32 255, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03) +; CHECK: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32 263, i32 %[[RQH]]) +; CHECK: %{{[^ ]+}} = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 %[[RQH]], i32 16, %struct.CustomAttrs* %[[ATTRA]]) + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +entry: + %0 = alloca %struct.CustomAttrs + %agg.tmp = alloca %dx.types.HitObject, align 4 + %agg.tmp1 = alloca %dx.types.HitObject, align 4 + %q2 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 5, i32 0), !dbg !38 ; line:29 col:78 + %1 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !42 ; line:31 col:3 + %2 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %1), !dbg !42 ; line:31 col:3 + %3 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !42 ; line:31 col:3 + call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %q2, %dx.types.Handle %3, i32 0, i32 255, <3 x float> zeroinitializer, float 0.000000e+00, <3 x float> , float 9.999000e+03), !dbg !42 ; line:31 col:3 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32)"(i32 363, %dx.types.HitObject* %agg.tmp, i32 %q2), !dbg !43 ; line:33 col:7 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %agg.tmp) #0, !dbg !44 ; line:24 col:3 + %.0 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %0, i32 0, i32 0 + store float 1.000000e+00, float* %.0 + %.1 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %0, i32 0, i32 1 + store float 2.000000e+00, float* %.1, align 4 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.CustomAttrs*)"(i32 363, %dx.types.HitObject* %agg.tmp1, i32 %q2, i32 16, %struct.CustomAttrs* %0), !dbg !47 ; line:36 col:7 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %agg.tmp1) #0, !dbg !48 ; line:24 col:3 + ret void, !dbg !49 ; line:37 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind +declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.CustomAttrs*)"(i32, %dx.types.HitObject*, i32, i32, %struct.CustomAttrs*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32)"(i32, %dx.types.HitObject*, i32) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!dx.version = !{!2} +!dx.valver = !{!2} +!dx.shaderModel = !{!3} +!dx.typeAnnotations = !{!4, !26} +!dx.entryPoints = !{!30} +!dx.fnprops = !{!35} +!dx.options = !{!36, !37} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{i32 1, i32 9} +!3 = !{!"lib", i32 6, i32 9} +!4 = !{i32 0, %"class.RWStructuredBuffer" undef, !5, %struct.RayDesc undef, !10, %"class.dx::HitObject" undef, !15, %"class.RayQuery<5, 0>" undef, !17, %struct.CustomAttrs undef, !23} +!5 = !{i32 4, !6, !7} +!6 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9} +!7 = !{i32 0, !8} +!8 = !{!9} +!9 = !{i32 0, float undef} +!10 = !{i32 32, !11, !12, !13, !14} +!11 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!12 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!13 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3} +!14 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!15 = !{i32 4, !16} +!16 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!17 = !{i32 4, !18, !19} +!18 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} +!19 = !{i32 0, !20} +!20 = !{!21, !22} +!21 = !{i32 1, i64 5} +!22 = !{i32 1, i64 0} +!23 = !{i32 8, !24, !25} +!24 = !{i32 6, !"x", i32 3, i32 0, i32 7, i32 9} +!25 = !{i32 6, !"y", i32 3, i32 4, i32 7, i32 9} +!26 = !{i32 1, void ()* @"\01?main@@YAXXZ", !27} +!27 = !{!28} +!28 = !{i32 1, !29, !29} +!29 = !{} +!30 = !{null, !"", null, !31, null} +!31 = !{!32, null, null, null} +!32 = !{!33} +!33 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !34} +!34 = !{i32 0, i32 4} +!35 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!36 = !{i32 -2147483584} +!37 = !{i32 -1} +!38 = !DILocation(line: 29, column: 78, scope: !39) +!39 = !DISubprogram(name: "main", scope: !40, file: !40, line: 28, type: !41, isLocal: false, isDefinition: true, scopeLine: 28, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!40 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl", directory: "") +!41 = !DISubroutineType(types: !29) +!42 = !DILocation(line: 31, column: 3, scope: !39) +!43 = !DILocation(line: 33, column: 7, scope: !39) +!44 = !DILocation(line: 24, column: 3, scope: !45, inlinedAt: !46) +!45 = !DISubprogram(name: "Use", scope: !40, file: !40, line: 23, type: !41, isLocal: false, isDefinition: true, scopeLine: 23, flags: DIFlagPrototyped, isOptimized: false) +!46 = distinct !DILocation(line: 33, column: 3, scope: !39) +!47 = !DILocation(line: 36, column: 7, scope: !39) +!48 = !DILocation(line: 24, column: 3, scope: !45, inlinedAt: !49) +!49 = distinct !DILocation(line: 36, column: 3, scope: !39) +!50 = !DILocation(line: 37, column: 1, scope: !39) diff --git a/tools/clang/test/DXC/Passes/DxilGen/hitobject_traceinvoke_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/hitobject_traceinvoke_dxilgen.ll new file mode 100644 index 0000000000..03bb0716ce --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/hitobject_traceinvoke_dxilgen.ll @@ -0,0 +1,124 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s +; REQUIRES: dxil-1-9 + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%struct.Payload = type { <3 x float> } +%dx.types.HitObject = type { i8* } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.RWStructuredBuffer" = type { float } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%"class.dx::HitObject" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +entry: + %pld_invoke = alloca %struct.Payload + %pld_trace = alloca %struct.Payload + %hit = alloca %dx.types.HitObject, align 4 + %0 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !32 ; line:91 col:3 + call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !32 ; line:91 col:3 + %1 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !36 ; line:91 col:23 + %rtas = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %1), !dbg !36 ; line:91 col:23 + + ; Capture the handle for the RTAS + ; CHECK: %[[RTAS:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %{{[^ ]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }) + %2 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %rtas, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !36 ; line:91 col:23 + + %3 = getelementptr inbounds %struct.Payload, %struct.Payload* %pld_trace, i32 0, i32 0, !dbg !36 ; line:91 col:23 + store <3 x float> , <3 x float>* %3, !dbg !36 ; line:91 col:23 + + ; CHECK: %[[TRACEHO:[^ ]+]] = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %[[RTAS]], i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* %pld_trace), !dbg !3 ; line:91 col:23 + ; CHECK: store %dx.types.HitObject %[[TRACEHO]], %dx.types.HitObject* %hit + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32 389, %dx.types.HitObject* %hit, %dx.types.Handle %2, i32 513, i32 1, i32 2, i32 4, i32 0, <3 x float> , float 3.000000e+00, <3 x float> , float 7.000000e+00, %struct.Payload* %pld_trace), !dbg !36 ; line:91 col:23 + + %4 = getelementptr inbounds %struct.Payload, %struct.Payload* %pld_trace, i32 0, i32 0, !dbg !37 ; line:101 col:3 + %5 = load <3 x float>, <3 x float>* %4, !dbg !37 ; line:101 col:3 + %6 = getelementptr inbounds %struct.Payload, %struct.Payload* %pld_invoke, i32 0, i32 0, !dbg !37 ; line:101 col:3 + store <3 x float> %5, <3 x float>* %6, !dbg !37 ; line:101 col:3 + + ; CHECK: %[[INVOKEHO:[^ ]+]] = load %dx.types.HitObject, %dx.types.HitObject* %hit + ; CHECK: call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject %[[INVOKEHO]], %struct.Payload* %pld_invoke) + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32 382, %dx.types.HitObject* %hit, %struct.Payload* %pld_invoke), !dbg !37 ; line:101 col:3 + + %7 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !38 ; line:102 col:1 + call void @llvm.lifetime.end(i64 4, i8* %7) #0, !dbg !38 ; line:102 col:1 + ret void, !dbg !38 ; line:102 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32, %dx.types.HitObject*, %struct.Payload*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !20} +!dx.entryPoints = !{!24} +!dx.fnprops = !{!29} +!dx.options = !{!30, !31} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.4928 (ser_hlslattributes_patch, 937c16cc6)"} +!3 = !{i32 1, i32 9} +!4 = !{!"lib", i32 6, i32 9} +!5 = !{i32 0, %"class.RWStructuredBuffer" undef, !6, %struct.RayDesc undef, !11, %struct.Payload undef, !16, %"class.dx::HitObject" undef, !18} +!6 = !{i32 4, !7, !8} +!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9} +!8 = !{i32 0, !9} +!9 = !{!10} +!10 = !{i32 0, float undef} +!11 = !{i32 32, !12, !13, !14, !15} +!12 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!13 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!14 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3} +!15 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!16 = !{i32 12, !17} +!17 = !{i32 6, !"dummy", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!18 = !{i32 4, !19} +!19 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!20 = !{i32 1, void ()* @"\01?main@@YAXXZ", !21} +!21 = !{!22} +!22 = !{i32 1, !23, !23} +!23 = !{} +!24 = !{null, !"", null, !25, null} +!25 = !{!26, null, null, null} +!26 = !{!27} +!27 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !28} +!28 = !{i32 0, i32 4} +!29 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!30 = !{i32 -2147483584} +!31 = !{i32 -1} +!32 = !DILocation(line: 91, column: 3, scope: !33) +!33 = !DISubprogram(name: "main", scope: !34, file: !34, line: 81, type: !35, isLocal: false, isDefinition: true, scopeLine: 81, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!34 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl", directory: "") +!35 = !DISubroutineType(types: !23) +!36 = !DILocation(line: 91, column: 23, scope: !33) +!37 = !DILocation(line: 101, column: 3, scope: !33) +!38 = !DILocation(line: 102, column: 1, scope: !33) diff --git a/tools/clang/test/DXC/Passes/DxilGen/linalg-builtins.ll b/tools/clang/test/DXC/Passes/DxilGen/linalg-builtins.ll new file mode 100644 index 0000000000..ea1be46c4c --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/linalg-builtins.ll @@ -0,0 +1,189 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s +; REQUIRES: dxil-1-9 + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.ByteAddressBuffer = type { i32 } +%struct.RWByteAddressBuffer = type { i32 } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?input_vector_buffer@@3UByteAddressBuffer@@A" = external global %struct.ByteAddressBuffer, align 4 +@"\01?opa_input_buffer@@3UByteAddressBuffer@@A" = external global %struct.ByteAddressBuffer, align 4 +@"\01?matrix_buffer@@3UByteAddressBuffer@@A" = external global %struct.ByteAddressBuffer, align 4 +@"\01?bias_buffer@@3UByteAddressBuffer@@A" = external global %struct.ByteAddressBuffer, align 4 +@"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4 +@"\01?output_vector_buffer@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4 + +; Function Attrs: nounwind +define void @cs_main() #0 { +entry: + ;CHECK-DAG: %[[MLD:[^ ]+]] = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?matrix_buffer@@3UByteAddressBuffer@@A" + ;CHECK-DAG: %[[BLD:[^ ]+]] = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?bias_buffer@@3UByteAddressBuffer@@A" + ;CHECK-DAG: %[[RWMLD0:[^ ]+]] = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A" + %output_vector = alloca <4 x float>, align 4 + %tmp = bitcast <4 x float>* %output_vector to i8*, !dbg !21 ; line:14 col:5 + call void @llvm.lifetime.start(i64 16, i8* %tmp) #0, !dbg !21 ; line:14 col:5 + %tmp1 = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?input_vector_buffer@@3UByteAddressBuffer@@A", !dbg !25 ; line:17 col:37 + %tmp2 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %tmp1), !dbg !25 ; line:17 col:37 + %tmp3 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp2, %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer zeroinitializer), !dbg !25 ; line:17 col:37 + %tmp4 = call <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp3, i32 0), !dbg !25 ; line:17 col:37 + %tmp5 = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?matrix_buffer@@3UByteAddressBuffer@@A", !dbg !26 ; line:33 col:5 + %tmp6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %tmp5), !dbg !26 ; line:33 col:5 + %tmp7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp6, %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer zeroinitializer), !dbg !26 ; line:33 col:5 + + ;CHECK: %[[MCH0:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.ByteAddressBuffer(i32 160, %struct.ByteAddressBuffer %[[MLD]] + ;CHECK: %[[MAH0:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[MCH0]] + ;CHECK: call <4 x float> @dx.op.matVecMul.v4f32.v4f32(i32 305, <4 x float> %{{[^ ]+}}, i1 false, i32 9, %dx.types.Handle %[[MAH0]], i32 0, i32 9, i32 4, i32 4, i32 0, i1 false, i32 64, i1 false) + call void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32 390, <4 x float>* %output_vector, i1 false, <4 x float> %tmp4, i1 false, i32 9, %dx.types.Handle %tmp7, i32 0, i32 9, i32 4, i32 4, i32 0, i1 false, i32 64), !dbg !26 ; line:33 col:5 + + %tmp8 = load <4 x float>, <4 x float>* %output_vector, align 4, !dbg !27, !tbaa !28 ; line:37 col:35 + %tmp9 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?output_vector_buffer@@3URWByteAddressBuffer@@A", !dbg !31 ; line:37 col:5 + %tmp10 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp9), !dbg !31 ; line:37 col:5 + %tmp11 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp10, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !31 ; line:37 col:5 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp11, i32 0, <4 x float> %tmp8), !dbg !31 ; line:37 col:5 + %tmp12 = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?matrix_buffer@@3UByteAddressBuffer@@A", !dbg !32 ; line:49 col:5 + %tmp13 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %tmp12), !dbg !32 ; line:49 col:5 + %tmp14 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp13, %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer zeroinitializer), !dbg !32 ; line:49 col:5 + %tmp15 = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?bias_buffer@@3UByteAddressBuffer@@A", !dbg !32 ; line:49 col:5 + %tmp16 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %tmp15), !dbg !32 ; line:49 col:5 + %tmp17 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp16, %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer zeroinitializer), !dbg !32 ; line:49 col:5 + + ;CHECK: %[[MCH1:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.ByteAddressBuffer(i32 160, %struct.ByteAddressBuffer %[[MLD]] + ;CHECK: %[[MAH1:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[MCH1]] + ;CHECK: %[[BCH1:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.ByteAddressBuffer(i32 160, %struct.ByteAddressBuffer %[[BLD]] + ;CHECK: %[[BAH1:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[BCH1]] + ;CHECK: call <4 x float> @dx.op.matVecMulAdd.v4f32.v4f32(i32 306, <4 x float> %{{[^ ]+}}, i1 false, i32 9, %dx.types.Handle %[[MAH1]], i32 0, i32 9, i32 4, i32 4, i32 0, i1 false, i32 64, %dx.types.Handle %[[BAH1]], i32 0, i32 9, i1 false) + call void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32 391, <4 x float>* %output_vector, i1 false, <4 x float> %tmp4, i1 false, i32 9, %dx.types.Handle %tmp14, i32 0, i32 9, i32 4, i32 4, i32 0, i1 false, i32 64, %dx.types.Handle %tmp17, i32 0, i32 9), !dbg !32 ; line:49 col:5 + + %tmp18 = load <4 x float>, <4 x float>* %output_vector, align 4, !dbg !33, !tbaa !28 ; line:54 col:38 + %tmp19 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?output_vector_buffer@@3URWByteAddressBuffer@@A", !dbg !34 ; line:54 col:5 + %tmp20 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp19), !dbg !34 ; line:54 col:5 + %tmp21 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp20, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !34 ; line:54 col:5 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32 277, %dx.types.Handle %tmp21, i32 1024, <4 x float> %tmp18), !dbg !34 ; line:54 col:5 + %tmp22 = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?opa_input_buffer@@3UByteAddressBuffer@@A", !dbg !35 ; line:56 col:37 + %tmp23 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %tmp22), !dbg !35 ; line:56 col:37 + %tmp24 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp23, %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer zeroinitializer), !dbg !35 ; line:56 col:37 + %tmp25 = call <8 x i32> @"dx.hl.op.ro.<8 x i32> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp24, i32 0), !dbg !35 ; line:56 col:37 + %tmp26 = load %struct.ByteAddressBuffer, %struct.ByteAddressBuffer* @"\01?opa_input_buffer@@3UByteAddressBuffer@@A", !dbg !36 ; line:57 col:37 + %tmp27 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32 0, %struct.ByteAddressBuffer %tmp26), !dbg !36 ; line:57 col:37 + %tmp28 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp27, %dx.types.ResourceProperties { i32 11, i32 0 }, %struct.ByteAddressBuffer zeroinitializer), !dbg !36 ; line:57 col:37 + %tmp29 = call <8 x i32> @"dx.hl.op.ro.<8 x i32> (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %tmp28, i32 128), !dbg !36 ; line:57 col:37 + %tmp30 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A", !dbg !37 ; line:67 col:5 + %tmp31 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp30), !dbg !37 ; line:67 col:5 + %tmp32 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp31, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !37 ; line:67 col:5 + + ;CHECK: %[[RWMCH0:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer %[[RWMLD0]] + ;CHECK: %[[RWMAH0:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RWMCH0]] + ;CHECK: call void @dx.op.outerProductAccumulate.v8i32.v8i32(i32 307, <8 x i32> %{{[^ ]+}}, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %[[RWMAH0]], i32 0, i32 5, i32 3, i32 0) + call void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32 392, <8 x i32> %tmp25, <8 x i32> %tmp29, %dx.types.Handle %tmp32, i32 0, i32 5, i32 3, i32 0), !dbg !37 ; line:67 col:5 + + + %tmp33 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A", !dbg !38 ; line:77 col:5 + %tmp34 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %tmp33), !dbg !38 ; line:77 col:5 + %tmp35 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %tmp34, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer zeroinitializer), !dbg !38 ; line:77 col:5 + + ;CHECK: %[[RWMCH1:[^ ]+]] = call %dx.types.Handle @dx.op.createHandleForLib.struct.RWByteAddressBuffer(i32 160, %struct.RWByteAddressBuffer %[[RWMLD0]] + ;CHECK: %[[RWMAH1:[^ ]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %[[RWMCH1]] + ;CHECK: call void @dx.op.vectorAccumulate.v8i32(i32 308, <8 x i32> %{{[^ ]+}}, %dx.types.Handle %[[RWMAH1]], i32 0) + call void @"dx.hl.op..void (i32, <8 x i32>, %dx.types.Handle, i32)"(i32 393, <8 x i32> %tmp25, %dx.types.Handle %tmp35, i32 0), !dbg !38 ; line:77 col:5 + + %tmp36 = bitcast <4 x float>* %output_vector to i8*, !dbg !39 ; line:79 col:1 + call void @llvm.lifetime.end(i64 16, i8* %tmp36) #0, !dbg !39 ; line:79 col:1 + ret void, !dbg !39 ; line:79 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind readonly +declare <4 x float> @"dx.hl.op.ro.<4 x float> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.ByteAddressBuffer)"(i32, %struct.ByteAddressBuffer) #2 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.ByteAddressBuffer) #2 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32)"(i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, <4 x float>)"(i32, %dx.types.Handle, i32, <4 x float>) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32, %struct.RWByteAddressBuffer) #2 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer) #2 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32)"(i32, <4 x float>*, i1, <4 x float>, i1, i32, %dx.types.Handle, i32, i32, i32, i32, i32, i1, i32, %dx.types.Handle, i32, i32) #0 + +; Function Attrs: nounwind readonly +declare <8 x i32> @"dx.hl.op.ro.<8 x i32> (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32)"(i32, <8 x i32>, <8 x i32>, %dx.types.Handle, i32, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, <8 x i32>, %dx.types.Handle, i32)"(i32, <8 x i32>, %dx.types.Handle, i32) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!dx.version = !{!2} +!dx.valver = !{!2} +!dx.shaderModel = !{!3} +!dx.typeAnnotations = !{!4} +!dx.entryPoints = !{!8} +!dx.fnprops = !{!18} +!dx.options = !{!19, !20} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{i32 1, i32 9} +!3 = !{!"cs", i32 6, i32 9} +!4 = !{i32 1, void ()* @cs_main, !5} +!5 = !{!6} +!6 = !{i32 1, !7, !7} +!7 = !{} +!8 = !{void ()* @cs_main, !"cs_main", null, !9, null} +!9 = !{!10, !15, null, null} +!10 = !{!11, !12, !13, !14} +!11 = !{i32 0, %struct.ByteAddressBuffer* @"\01?input_vector_buffer@@3UByteAddressBuffer@@A", !"input_vector_buffer", i32 -1, i32 -1, i32 1, i32 11, i32 0, null} +!12 = !{i32 1, %struct.ByteAddressBuffer* @"\01?opa_input_buffer@@3UByteAddressBuffer@@A", !"opa_input_buffer", i32 -1, i32 -1, i32 1, i32 11, i32 0, null} +!13 = !{i32 2, %struct.ByteAddressBuffer* @"\01?matrix_buffer@@3UByteAddressBuffer@@A", !"matrix_buffer", i32 -1, i32 -1, i32 1, i32 11, i32 0, null} +!14 = !{i32 3, %struct.ByteAddressBuffer* @"\01?bias_buffer@@3UByteAddressBuffer@@A", !"bias_buffer", i32 -1, i32 -1, i32 1, i32 11, i32 0, null} +!15 = !{!16, !17} +!16 = !{i32 0, %struct.RWByteAddressBuffer* @"\01?rw_matrix_buffer@@3URWByteAddressBuffer@@A", !"rw_matrix_buffer", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!17 = !{i32 1, %struct.RWByteAddressBuffer* @"\01?output_vector_buffer@@3URWByteAddressBuffer@@A", !"output_vector_buffer", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!18 = !{void ()* @cs_main, i32 5, i32 1, i32 1, i32 1} +!19 = !{i32 -2147483584} +!20 = !{i32 -1} +!21 = !DILocation(line: 14, column: 5, scope: !22) +!22 = !DISubprogram(name: "cs_main", scope: !23, file: !23, line: 12, type: !24, isLocal: false, isDefinition: true, scopeLine: 13, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @cs_main) +!23 = !DIFile(filename: "DirectXShaderCompiler\5Ctools\5Cclang\5Ctest\5CCodeGenDXIL\5Chlsl\5Cintrinsics\5Clinalg_builtins\5Clinalg-builtins.hlsl", directory: "") +!24 = !DISubroutineType(types: !7) +!25 = !DILocation(line: 17, column: 37, scope: !22) +!26 = !DILocation(line: 33, column: 5, scope: !22) +!27 = !DILocation(line: 37, column: 35, scope: !22) +!28 = !{!29, !29, i64 0} +!29 = !{!"omnipotent char", !30, i64 0} +!30 = !{!"Simple C/C++ TBAA"} +!31 = !DILocation(line: 37, column: 5, scope: !22) +!32 = !DILocation(line: 49, column: 5, scope: !22) +!33 = !DILocation(line: 54, column: 38, scope: !22) +!34 = !DILocation(line: 54, column: 5, scope: !22) +!35 = !DILocation(line: 56, column: 37, scope: !22) +!36 = !DILocation(line: 57, column: 37, scope: !22) +!37 = !DILocation(line: 67, column: 5, scope: !22) +!38 = !DILocation(line: 77, column: 5, scope: !22) +!39 = !DILocation(line: 79, column: 1, scope: !22) diff --git a/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_cb_raydesc_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_cb_raydesc_dxilgen.ll new file mode 100644 index 0000000000..b969a63f12 --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_cb_raydesc_dxilgen.ll @@ -0,0 +1,160 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s + +; Based on tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%"$Globals" = type { %struct.RayDesc } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.RayQuery<513, 0>" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 +@"$Globals" = external constant %"$Globals" + +; Function Attrs: nounwind +define void @main() #0 { +entry: + + ; Capture CB, RTAS, and RayQuery + ; CHECK-DAG: %[[CB:[^ ,]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %"$Globals", %dx.types.ResourceProperties { i32 13, i32 32 }) + ; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }) + ; CHECK-DAG: %[[RQ:[^ ,]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513) + + %0 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32 0, %"$Globals"* @"$Globals", i32 0) + %1 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %0, %dx.types.ResourceProperties { i32 13, i32 32 }, %"$Globals" undef) + %2 = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %1, i32 0) + %3 = getelementptr inbounds %"$Globals", %"$Globals"* %2, i32 0, i32 0 + %rayQuery1 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0), !dbg !34 ; line:12 col:71 + %4 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !38 ; line:13 col:3 + %5 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %4), !dbg !38 ; line:13 col:3 + %6 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !38 ; line:13 col:3 + + ; Load RayDesc.Origin + ; CHECK: %[[ORIG_CB_LD:[^ ,]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 0) + ; CHECK: %[[ORIG_EX0:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[ORIG_CB_LD]], 0 + ; CHECK: %[[ORIG_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[ORIG_EX0]], i64 0 + ; CHECK: %[[ORIG_EX1:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[ORIG_CB_LD]], 1 + ; CHECK: %[[ORIG_VXY:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VX]], float %[[ORIG_EX1]], i64 1 + ; CHECK: %[[ORIG_EX2:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[ORIG_CB_LD]], 2 + ; CHECK: %[[ORIG_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VXY]], float %[[ORIG_EX2]], i64 2 + + ; Load RayDesc.TMin + ; CHECK: %[[TMIN_CB_LD:[^ ,]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 0) + ; CHECK: %[[TMIN:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[TMIN_CB_LD]], 3 + + ; Load RayDesc.Direction + ; CHECK: %[[DIR_CB_LD:[^ ,]+]] = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 1) + ; CHECK: %[[DIR_EX0:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[DIR_CB_LD]], 0 + ; CHECK: %[[DIR_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[DIR_EX0]], i64 0 + ; CHECK: %[[DIR_EX1:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[DIR_CB_LD]], 1 + ; CHECK: %[[DIR_VXY:[^ ,]+]] = insertelement <3 x float> %[[DIR_VX]], float %[[DIR_EX1]], i64 1 + ; CHECK: %[[DIR_EX2:[^ ,]+]] = extractvalue %dx.types.CBufRet.f32 %[[DIR_CB_LD]], 2 + ; CHECK: %[[DIR_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[DIR_VXY]], float %[[DIR_EX2]], i64 2 + + ; Load RayDesc.TMax + ; CHECK: %21 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 59, %dx.types.Handle %[[CB]], i32 1) + ; CHECK: %22 = extractvalue %dx.types.CBufRet.f32 %21, 3 + + ; Extract RayDesc vector fields + ; CHECK: %[[ORIGX:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 0 + ; CHECK: %[[ORIGY:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 1 + ; CHECK: %[[ORIGZ:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 2 + ; CHECK: %[[DIRX:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 0 + ; CHECK: %[[DIRY:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 1 + ; CHECK: %[[DIRZ:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 2 + + %7 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %3, i32 0, i32 0, !dbg !38 ; line:13 col:3 + %8 = load <3 x float>, <3 x float>* %7, !dbg !38 ; line:13 col:3 + %9 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %3, i32 0, i32 1, !dbg !38 ; line:13 col:3 + %10 = load float, float* %9, !dbg !38 ; line:13 col:3 + %11 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %3, i32 0, i32 2, !dbg !38 ; line:13 col:3 + %12 = load <3 x float>, <3 x float>* %11, !dbg !38 ; line:13 col:3 + %13 = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %3, i32 0, i32 3, !dbg !38 ; line:13 col:3 + %14 = load float, float* %13, !dbg !38 ; line:13 col:3 + + ; Call TraceRayInline + ; CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, float %[[ORIGX]], float %[[ORIGY]], float %[[ORIGZ]], float %[[TMIN]], float %[[DIRX]], float %[[DIRY]], float %[[DIRZ]], float %22) + + call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rayQuery1, %dx.types.Handle %6, i32 1, i32 2, <3 x float> %8, float %10, <3 x float> %12, float %14), !dbg !38 ; line:13 col:3 + ret void, !dbg !39 ; line:14 col:1 +} + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32, %"$Globals"*, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"$Globals") #1 + +; Function Attrs: nounwind +declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !20} +!dx.entryPoints = !{!24} +!dx.fnprops = !{!31} +!dx.options = !{!32, !33} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.14861 (main, 33bc44a3d)"} +!3 = !{i32 1, i32 5} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 5} +!6 = !{i32 0, %struct.RayDesc undef, !7, %"class.RayQuery<513, 0>" undef, !12, %"$Globals" undef, !18} +!7 = !{i32 32, !8, !9, !10, !11} +!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9} +!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!12 = !{i32 4, !13, !14} +!13 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} +!14 = !{i32 0, !15} +!15 = !{!16, !17} +!16 = !{i32 1, i64 513} +!17 = !{i32 1, i64 0} +!18 = !{i32 32, !19} +!19 = !{i32 6, !"rayDesc", i32 3, i32 0} +!20 = !{i32 1, void ()* @main, !21} +!21 = !{!22} +!22 = !{i32 1, !23, !23} +!23 = !{} +!24 = !{void ()* @main, !"main", null, !25, null} +!25 = !{!26, null, !29, null} +!26 = !{!27} +!27 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !28} +!28 = !{i32 0, i32 4} +!29 = !{!30} +!30 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 32, null} +!31 = !{void ()* @main, i32 1} +!32 = !{i32 64} +!33 = !{i32 -1} +!34 = !DILocation(line: 12, column: 71, scope: !35) +!35 = !DISubprogram(name: "main", scope: !36, file: !36, line: 11, type: !37, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @main) +!36 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl", directory: "") +!37 = !DISubroutineType(types: !23) +!38 = !DILocation(line: 13, column: 3, scope: !35) +!39 = !DILocation(line: 14, column: 1, scope: !35) diff --git a/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_dxilgen.ll b/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_dxilgen.ll new file mode 100644 index 0000000000..0d97d8782d --- /dev/null +++ b/tools/clang/test/DXC/Passes/DxilGen/tracerayinline_dxilgen.ll @@ -0,0 +1,134 @@ +; RUN: %dxopt %s -hlsl-passes-resume -dxilgen -S | FileCheck %s + +; Based on tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl, +; with call to DoTrace commented out. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%"class.RayQuery<513, 0>" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #0 + +; Function Attrs: nounwind +declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #1 + +; Function Attrs: nounwind +define void @main(float* noalias, <3 x float>, float, <3 x float>, float) #1 { +entry: + + ; Load RayDesc fields from input + ; CHECK-DAG: %[[ORIGX_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef) + ; CHECK-DAG: %[[ORIGY_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef) + ; CHECK-DAG: %[[ORIGZ_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef) + ; CHECK-DAG: %[[TMIN:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef) + ; CHECK-DAG: %[[DIRX_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 undef) + ; CHECK-DAG: %[[DIRY_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 undef) + ; CHECK-DAG: %[[DIRZ_LI:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 2, i32 undef) + ; CHECK-DAG: %[[TMAX:[^ ,]+]] = call float @dx.op.loadInput.f32(i32 4, i32 3, i32 0, i8 0, i32 undef) + ; CHECK-DAG: %[[ORIG_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[ORIGX_LI]], i64 0 + ; CHECK-DAG: %[[ORIG_VXY:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VX]], float %[[ORIGY_LI]], i64 1 + ; CHECK-DAG: %[[ORIG_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[ORIG_VXY]], float %[[ORIGZ_LI]], i64 2 + ; CHECK-DAG: %[[DIR_VX:[^ ,]+]] = insertelement <3 x float> undef, float %[[DIRX_LI]], i64 0 + ; CHECK-DAG: %[[DIR_VXY:[^ ,]+]] = insertelement <3 x float> %[[DIR_VX]], float %[[DIRY_LI]], i64 1 + ; CHECK-DAG: %[[DIR_VXYZ:[^ ,]+]] = insertelement <3 x float> %[[DIR_VXY]], float %[[DIRZ_LI]], i64 2 + + ; Capture RayQuery and RTAS + ; CHECK-DAG: %[[RQ:[^ ,]+]] = call i32 @dx.op.allocateRayQuery(i32 178, i32 513) + ; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }) + + %rayQuery1 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0), !dbg !41 ; line:15 col:71 + %5 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !45 ; line:17 col:3 + %6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %5), !dbg !45 ; line:17 col:3 + %7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %6, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure zeroinitializer), !dbg !45 ; line:17 col:3 + + ; Extract RayDesc vector fields + ; CHECK-DAG: %[[ORIGX:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 0 + ; CHECK-DAG: %[[ORIGY:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 1 + ; CHECK-DAG: %[[ORIGZ:[^ ,]+]] = extractelement <3 x float> %[[ORIG_VXYZ]], i64 2 + ; CHECK-DAG: %[[DIRX:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 0 + ; CHECK-DAG: %[[DIRY:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 1 + ; CHECK-DAG: %[[DIRZ:[^ ,]+]] = extractelement <3 x float> %[[DIR_VXYZ]], i64 2 + + ; Call TraceRayInline + ; CHECK: call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, float %[[ORIGX]], float %[[ORIGY]], float %[[ORIGZ]], float %[[TMIN]], float %[[DIRX]], float %[[DIRY]], float %[[DIRZ]], float %[[TMAX]]) + + call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %rayQuery1, %dx.types.Handle %7, i32 1, i32 2, <3 x float> %1, float %2, <3 x float> %3, float %4), !dbg !45 ; line:17 col:3 + store float 0.000000e+00, float* %0, !dbg !46 ; line:18 col:3 + ret void, !dbg !46 ; line:18 col:3 +} + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float) #1 + +attributes #0 = { nounwind readnone } +attributes #1 = { nounwind } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !18} +!dx.entryPoints = !{!33} +!dx.fnprops = !{!38} +!dx.options = !{!39, !40} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.14861 (main, 33bc44a3d)"} +!3 = !{i32 1, i32 5} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 5} +!6 = !{i32 0, %struct.RayDesc undef, !7, %"class.RayQuery<513, 0>" undef, !12} +!7 = !{i32 32, !8, !9, !10, !11} +!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9} +!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!12 = !{i32 4, !13, !14} +!13 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} +!14 = !{i32 0, !15} +!15 = !{!16, !17} +!16 = !{i32 1, i64 513} +!17 = !{i32 1, i64 0} +!18 = !{i32 1, void (float*, <3 x float>, float, <3 x float>, float)* @main, !19} +!19 = !{!20, !22, !25, !27, !29, !31} +!20 = !{i32 0, !21, !21} +!21 = !{} +!22 = !{i32 1, !23, !24} +!23 = !{i32 4, !"OUT", i32 7, i32 9} +!24 = !{i32 0} +!25 = !{i32 0, !26, !24} +!26 = !{i32 4, !"RAYDESC", i32 7, i32 9} +!27 = !{i32 0, !26, !28} +!28 = !{i32 1} +!29 = !{i32 0, !26, !30} +!30 = !{i32 2} +!31 = !{i32 0, !26, !32} +!32 = !{i32 3} +!33 = !{void (float*, <3 x float>, float, <3 x float>, float)* @main, !"main", null, !34, null} +!34 = !{!35, null, null, null} +!35 = !{!36} +!36 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !37} +!37 = !{i32 0, i32 4} +!38 = !{void (float*, <3 x float>, float, <3 x float>, float)* @main, i32 1} +!39 = !{i32 64} +!40 = !{i32 -1} +!41 = !DILocation(line: 15, column: 71, scope: !42) +!42 = !DISubprogram(name: "main", scope: !43, file: !43, line: 14, type: !44, isLocal: false, isDefinition: true, scopeLine: 14, flags: DIFlagPrototyped, isOptimized: false, function: void (float*, <3 x float>, float, <3 x float>, float)* @main) +!43 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl", directory: "") +!44 = !DISubroutineType(types: !21) +!45 = !DILocation(line: 17, column: 3, scope: !42) +!46 = !DILocation(line: 18, column: 3, scope: !42) diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_fromrayquery_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_fromrayquery_scalarrepl.ll new file mode 100644 index 0000000000..85c3a34eb9 --- /dev/null +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_fromrayquery_scalarrepl.ll @@ -0,0 +1,383 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; COM: Original HLSL code +; COM: RaytracingAccelerationStructure RTAS; +; COM: RWStructuredBuffer UAV : register(u0); +; COM: RWByteAddressBuffer inbuf; +; COM: RWByteAddressBuffer outbuf; +; COM: +; COM: RayDesc MakeRayDesc() { +; COM: RayDesc desc; +; COM: desc.Origin = float3(0, 0, 0); +; COM: desc.Direction = float3(1, 0, 0); +; COM: desc.TMin = 0.0f; +; COM: desc.TMax = 9999.0; +; COM: return desc; +; COM: } +; COM: +; COM: struct CustomAttrs { +; COM: float x; +; COM: float y; +; COM: }; +; COM: +; COM: void Use(in dx::HitObject hit) { +; COM: dx::MaybeReorderThread(hit); +; COM: } +; COM: +; COM: [shader("raygeneration")] +; COM: void main() { +; COM: RayQuery q; +; COM: RayDesc ray = MakeRayDesc(); +; COM: q.TraceRayInline(RTAS, RAY_FLAG_NONE, 0xFF, ray); +; COM: +; COM: Use(dx::HitObject::FromRayQuery(q)); +; COM: +; COM: CustomAttrs attrs; +; COM: attrs.x = inbuf.Load(0); +; COM: attrs.y = inbuf.Load(4); +; COM: Use(dx::HitObject::FromRayQuery(q, 16, attrs)); +; COM: +; COM: attrs.x = inbuf.Load(8); +; COM: attrs.y = inbuf.Load(12); +; COM: Use(dx::HitObject::FromRayQuery(q, 17, attrs)); +; COM: +; COM: outbuf.Store(0, attrs.x); +; COM: outbuf.Store(4, attrs.y); +; COM: } + +; +; Buffer Definitions: +; +; cbuffer $Globals +; { +; +; [0 x i8] (type annotation not present) +; +; } +; +; Resource bind info for UAV +; { +; +; float $Element; ; Offset: 0 Size: 4 +; +; } +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; $Globals cbuffer NA NA CB0 cb4294967295 1 +; RTAS texture i32 ras T0t4294967295,space4294967295 1 +; UAV UAV struct r/w U0 u0 1 +; inbuf UAV byte r/w U1u4294967295,space4294967295 1 +; outbuf UAV byte r/w U2u4294967295,space4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%"class.RWStructuredBuffer" = type { float } +%struct.RWByteAddressBuffer = type { i32 } +%ConstantBuffer = type opaque +%"class.RayQuery<5, 0>" = type { i32 } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%dx.types.HitObject = type { i8* } +%struct.CustomAttrs = type { float, float } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.dx::HitObject" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 +@"\01?UAV@@3V?$RWStructuredBuffer@M@@A" = external global %"class.RWStructuredBuffer", align 4 +@"\01?inbuf@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4 +@"\01?outbuf@@3URWByteAddressBuffer@@A" = external global %struct.RWByteAddressBuffer, align 4 +@"$Globals" = external constant %ConstantBuffer + +; CHECK: %[[RQA:[^ ]+]] = alloca i32 +; CHECK: %[[XATTRA:[^ ]+]] = alloca float +; CHECK: %[[YATTRA:[^ ]+]] = alloca float +; CHECK: %[[ATTRA0:[^ ]+]] = alloca %struct.CustomAttrs +; CHECK: %[[ATTRA1:[^ ]+]] = alloca %struct.CustomAttrs + +; COM: Check same query handle used for TraceRayInline and the FromRayQuery calls +; CHECK: %[[RQH:[^ ]+]] = load i32, i32* %[[RQA]] +; CHECK: call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %[[RQH]], + +; COM: Check RQ handle loaded for first FromRayQuery call +; CHECK: %[[RQH0:[^ ]+]] = load i32, i32* %[[RQA]] +; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32)"(i32 363, %dx.types.HitObject* %{{[^ ]+}}, i32 %[[RQH0]]) + +; COM: Check buffer loads for first FromRayQuery-with-attrs call +; CHECK: %[[XI0:[^ ]+]] = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %{{[^ ]+}}, i32 0) +; CHECK: %[[XF0:[^ ]+]] = uitofp i32 %[[XI0]] to float +; CHECK: store float %[[XF0]], float* %[[XATTRA]], align 4 +; CHECK: %[[YI0:[^ ]+]] = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %{{[^ ]+}}, i32 4) +; CHECK: %[[YF0:[^ ]+]] = uitofp i32 %[[YI0]] to float +; CHECK: store float %[[YF0]], float* %[[YATTRA]], align 4 + +; COM: Check that values from buffer flow into first FromRayQuery-with-attrs call +; CHECK: %[[XPTR0:[^ ]+]] = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %[[ATTRA0]], i32 0, i32 0 +; CHECK: %[[XF1:[^ ]+]] = load float, float* %[[XATTRA]] +; CHECK: store float %[[XF1]], float* %[[XPTR0]] +; CHECK: %[[YPTR0:[^ ]+]] = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %[[ATTRA0]], i32 0, i32 1 +; CHECK: %[[YF1:[^ ]+]] = load float, float* %[[YATTRA]] +; CHECK: store float %[[YF1]], float* %[[YPTR0]] +; CHECK: %[[RQH1:[^ ]+]] = load i32, i32* %[[RQA]] +; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.CustomAttrs*)"(i32 363, %dx.types.HitObject* %{{[^ ]+}}, i32 %[[RQH1]], i32 16, %struct.CustomAttrs* %[[ATTRA0]]) + +; COM: Check buffer loads for second FromRayQuery-with-attrs call +; CHECK: %[[XI1:[^ ]+]] = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %{{[^ ]+}}, i32 8) +; CHECK: %[[XF1:[^ ]+]] = uitofp i32 %[[XI1]] to float +; CHECK: store float %[[XF1]], float* %[[XATTRA]], align 4 +; CHECK: %[[YI1:[^ ]+]] = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %{{[^ ]+}}, i32 12) +; CHECK: %[[YF1:[^ ]+]] = uitofp i32 %[[YI1]] to float +; CHECK: store float %[[YF1]], float* %[[YATTRA]], align 4 + +; COM: Check that values from buffer flow into second FromRayQuery-with-attrs call +; CHECK: %[[XPTR1:[^ ]+]] = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %[[ATTRA1]], i32 0, i32 0 +; CHECK: %[[XF2:[^ ]+]] = load float, float* %[[XATTRA]] +; CHECK: store float %[[XF2]], float* %[[XPTR1]] +; CHECK: %[[YPTR1:[^ ]+]] = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %[[ATTRA1]], i32 0, i32 1 +; CHECK: %[[YF2:[^ ]+]] = load float, float* %[[YATTRA]] +; CHECK: store float %[[YF2]], float* %[[YPTR1]] +; CHECK: %[[RQH2:[^ ]+]] = load i32, i32* %[[RQA]] +; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.CustomAttrs*)"(i32 363, %dx.types.HitObject* %{{[^ ]+}}, i32 %[[RQH2]], i32 17, %struct.CustomAttrs* %[[ATTRA1]]) + + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +entry: + %q = alloca %"class.RayQuery<5, 0>", align 4 + %ray = alloca %struct.RayDesc, align 4 + %agg.tmp = alloca %dx.types.HitObject, align 4 + %attrs = alloca %struct.CustomAttrs, align 4 + %agg.tmp4 = alloca %dx.types.HitObject, align 4 + %agg.tmp11 = alloca %dx.types.HitObject, align 4 + %0 = bitcast %"class.RayQuery<5, 0>"* %q to i8*, !dbg !45 ; line:26 col:3 + call void @llvm.lifetime.start(i64 4, i8* %0) #0, !dbg !45 ; line:26 col:3 + %q14 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 5, i32 0), !dbg !49 ; line:26 col:78 + %1 = getelementptr inbounds %"class.RayQuery<5, 0>", %"class.RayQuery<5, 0>"* %q, i32 0, i32 0, !dbg !49 ; line:26 col:78 + store i32 %q14, i32* %1, !dbg !49 ; line:26 col:78 + %2 = bitcast %struct.RayDesc* %ray to i8*, !dbg !50 ; line:27 col:3 + call void @llvm.lifetime.start(i64 32, i8* %2) #0, !dbg !50 ; line:27 col:3 + %Origin.i = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 0, !dbg !51 ; line:8 col:8 + store <3 x float> zeroinitializer, <3 x float>* %Origin.i, align 4, !dbg !54, !tbaa !55, !alias.scope !58 ; line:8 col:15 + %Direction.i = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 2, !dbg !61 ; line:9 col:8 + store <3 x float> , <3 x float>* %Direction.i, align 4, !dbg !62, !tbaa !55, !alias.scope !58 ; line:9 col:18 + %TMin.i = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 1, !dbg !63 ; line:10 col:8 + store float 0.000000e+00, float* %TMin.i, align 4, !dbg !64, !tbaa !65, !alias.scope !58 ; line:10 col:13 + %TMax.i = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %ray, i32 0, i32 3, !dbg !67 ; line:11 col:8 + store float 9.999000e+03, float* %TMax.i, align 4, !dbg !68, !tbaa !65, !alias.scope !58 ; line:11 col:13 + %3 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !69 ; line:28 col:3 + %4 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %3), !dbg !69 ; line:28 col:3 + %5 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %4, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !69 ; line:28 col:3 + call void @"dx.hl.op..void (i32, %\22class.RayQuery<5, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32 325, %"class.RayQuery<5, 0>"* %q, %dx.types.Handle %5, i32 0, i32 255, %struct.RayDesc* %ray), !dbg !69 ; line:28 col:3 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %\22class.RayQuery<5, 0>\22*)"(i32 363, %dx.types.HitObject* %agg.tmp, %"class.RayQuery<5, 0>"* %q), !dbg !70 ; line:30 col:7 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %agg.tmp) #0, !dbg !71 ; line:21 col:3 + %6 = bitcast %struct.CustomAttrs* %attrs to i8*, !dbg !74 ; line:32 col:3 + call void @llvm.lifetime.start(i64 8, i8* %6) #0, !dbg !74 ; line:32 col:3 + %7 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?inbuf@@3URWByteAddressBuffer@@A", !dbg !75 ; line:33 col:13 + %8 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %7), !dbg !75 ; line:33 col:13 + %9 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %8, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef), !dbg !75 ; line:33 col:13 + %10 = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %9, i32 0), !dbg !75 ; line:33 col:13 + %conv = uitofp i32 %10 to float, !dbg !75 ; line:33 col:13 + %x = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 0, !dbg !76 ; line:33 col:9 + store float %conv, float* %x, align 4, !dbg !77, !tbaa !65 ; line:33 col:11 + %11 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?inbuf@@3URWByteAddressBuffer@@A", !dbg !78 ; line:34 col:13 + %12 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %11), !dbg !78 ; line:34 col:13 + %13 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %12, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef), !dbg !78 ; line:34 col:13 + %14 = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %13, i32 4), !dbg !78 ; line:34 col:13 + %conv3 = uitofp i32 %14 to float, !dbg !78 ; line:34 col:13 + %y = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 1, !dbg !79 ; line:34 col:9 + store float %conv3, float* %y, align 4, !dbg !80, !tbaa !65 ; line:34 col:11 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %\22class.RayQuery<5, 0>\22*, i32, %struct.CustomAttrs*)"(i32 363, %dx.types.HitObject* %agg.tmp4, %"class.RayQuery<5, 0>"* %q, i32 16, %struct.CustomAttrs* %attrs), !dbg !81 ; line:35 col:7 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %agg.tmp4) #0, !dbg !82 ; line:21 col:3 + %15 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?inbuf@@3URWByteAddressBuffer@@A", !dbg !84 ; line:37 col:13 + %16 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %15), !dbg !84 ; line:37 col:13 + %17 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %16, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef), !dbg !84 ; line:37 col:13 + %18 = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %17, i32 8), !dbg !84 ; line:37 col:13 + %conv6 = uitofp i32 %18 to float, !dbg !84 ; line:37 col:13 + %x7 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 0, !dbg !85 ; line:37 col:9 + store float %conv6, float* %x7, align 4, !dbg !86, !tbaa !65 ; line:37 col:11 + %19 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?inbuf@@3URWByteAddressBuffer@@A", !dbg !87 ; line:38 col:13 + %20 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %19), !dbg !87 ; line:38 col:13 + %21 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %20, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef), !dbg !87 ; line:38 col:13 + %22 = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.Handle, i32)"(i32 231, %dx.types.Handle %21, i32 12), !dbg !87 ; line:38 col:13 + %conv9 = uitofp i32 %22 to float, !dbg !87 ; line:38 col:13 + %y10 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 1, !dbg !88 ; line:38 col:9 + store float %conv9, float* %y10, align 4, !dbg !89, !tbaa !65 ; line:38 col:11 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %\22class.RayQuery<5, 0>\22*, i32, %struct.CustomAttrs*)"(i32 363, %dx.types.HitObject* %agg.tmp11, %"class.RayQuery<5, 0>"* %q, i32 17, %struct.CustomAttrs* %attrs), !dbg !90 ; line:39 col:7 + call void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32 359, %dx.types.HitObject* %agg.tmp11) #0, !dbg !91 ; line:21 col:3 + %x12 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 0, !dbg !93 ; line:41 col:25 + %23 = load float, float* %x12, align 4, !dbg !93, !tbaa !65 ; line:41 col:25 + %24 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !dbg !94 ; line:41 col:3 + %25 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %24), !dbg !94 ; line:41 col:3 + %26 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %25, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef), !dbg !94 ; line:41 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32 277, %dx.types.Handle %26, i32 0, float %23), !dbg !94 ; line:41 col:3 + %y13 = getelementptr inbounds %struct.CustomAttrs, %struct.CustomAttrs* %attrs, i32 0, i32 1, !dbg !95 ; line:42 col:25 + %27 = load float, float* %y13, align 4, !dbg !95, !tbaa !65 ; line:42 col:25 + %28 = load %struct.RWByteAddressBuffer, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !dbg !96 ; line:42 col:3 + %29 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32 0, %struct.RWByteAddressBuffer %28), !dbg !96 ; line:42 col:3 + %30 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32 14, %dx.types.Handle %29, %dx.types.ResourceProperties { i32 4107, i32 0 }, %struct.RWByteAddressBuffer undef), !dbg !96 ; line:42 col:3 + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32 277, %dx.types.Handle %30, i32 4, float %27), !dbg !96 ; line:42 col:3 + %31 = bitcast %struct.CustomAttrs* %attrs to i8*, !dbg !97 ; line:43 col:1 + call void @llvm.lifetime.end(i64 8, i8* %31) #0, !dbg !97 ; line:43 col:1 + %32 = bitcast %struct.RayDesc* %ray to i8*, !dbg !97 ; line:43 col:1 + call void @llvm.lifetime.end(i64 32, i8* %32) #0, !dbg !97 ; line:43 col:1 + %33 = bitcast %"class.RayQuery<5, 0>"* %q to i8*, !dbg !97 ; line:43 col:1 + call void @llvm.lifetime.end(i64 4, i8* %33) #0, !dbg !97 ; line:43 col:1 + ret void, !dbg !97 ; line:43 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*)"(i32, %dx.types.HitObject*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %\22class.RayQuery<5, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32, %"class.RayQuery<5, 0>"*, %dx.types.Handle, i32, i32, %struct.RayDesc*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %\22class.RayQuery<5, 0>\22*)"(i32, %dx.types.HitObject*, %"class.RayQuery<5, 0>"*) #0 + +; Function Attrs: nounwind readonly +declare i32 @"dx.hl.op.ro.i32 (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #2 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RWByteAddressBuffer)"(i32, %struct.RWByteAddressBuffer) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RWByteAddressBuffer) #1 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %\22class.RayQuery<5, 0>\22*, i32, %struct.CustomAttrs*)"(i32, %dx.types.HitObject*, %"class.RayQuery<5, 0>"*, i32, %struct.CustomAttrs*) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, float)"(i32, %dx.types.Handle, i32, float) #0 + +; Function Attrs: nounwind +declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!dx.version = !{!2} +!dx.valver = !{!2} +!dx.shaderModel = !{!3} +!dx.typeAnnotations = !{!4, !26} +!dx.entryPoints = !{!30} +!dx.fnprops = !{!42} +!dx.options = !{!43, !44} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{i32 1, i32 9} +!3 = !{!"lib", i32 6, i32 9} +!4 = !{i32 0, %"class.RWStructuredBuffer" undef, !5, %struct.RayDesc undef, !10, %"class.dx::HitObject" undef, !15, %"class.RayQuery<5, 0>" undef, !17, %struct.CustomAttrs undef, !23} +!5 = !{i32 4, !6, !7} +!6 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9} +!7 = !{i32 0, !8} +!8 = !{!9} +!9 = !{i32 0, float undef} +!10 = !{i32 32, !11, !12, !13, !14} +!11 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!12 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!13 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3} +!14 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!15 = !{i32 4, !16} +!16 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!17 = !{i32 4, !18, !19} +!18 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} +!19 = !{i32 0, !20} +!20 = !{!21, !22} +!21 = !{i32 1, i64 5} +!22 = !{i32 1, i64 0} +!23 = !{i32 8, !24, !25} +!24 = !{i32 6, !"x", i32 3, i32 0, i32 7, i32 9} +!25 = !{i32 6, !"y", i32 3, i32 4, i32 7, i32 9} +!26 = !{i32 1, void ()* @"\01?main@@YAXXZ", !27} +!27 = !{!28} +!28 = !{i32 1, !29, !29} +!29 = !{} +!30 = !{null, !"", null, !31, null} +!31 = !{!32, !35, !40, null} +!32 = !{!33} +!33 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !34} +!34 = !{i32 0, i32 4} +!35 = !{!36, !38, !39} +!36 = !{i32 0, %"class.RWStructuredBuffer"* @"\01?UAV@@3V?$RWStructuredBuffer@M@@A", !"UAV", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !37} +!37 = !{i32 1, i32 4} +!38 = !{i32 1, %struct.RWByteAddressBuffer* @"\01?inbuf@@3URWByteAddressBuffer@@A", !"inbuf", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!39 = !{i32 2, %struct.RWByteAddressBuffer* @"\01?outbuf@@3URWByteAddressBuffer@@A", !"outbuf", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!40 = !{!41} +!41 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} +!42 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!43 = !{i32 -2147483584} +!44 = !{i32 -1} +!45 = !DILocation(line: 26, column: 3, scope: !46) +!46 = !DISubprogram(name: "main", scope: !47, file: !47, line: 25, type: !48, isLocal: false, isDefinition: true, scopeLine: 25, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!47 = !DIFile(filename: "hitobject_fromrayquery_scalarrepl.hlsl", directory: "") +!48 = !DISubroutineType(types: !29) +!49 = !DILocation(line: 26, column: 78, scope: !46) +!50 = !DILocation(line: 27, column: 3, scope: !46) +!51 = !DILocation(line: 8, column: 8, scope: !52, inlinedAt: !53) +!52 = !DISubprogram(name: "MakeRayDesc", scope: !47, file: !47, line: 6, type: !48, isLocal: false, isDefinition: true, scopeLine: 6, flags: DIFlagPrototyped, isOptimized: false) +!53 = distinct !DILocation(line: 27, column: 17, scope: !46) +!54 = !DILocation(line: 8, column: 15, scope: !52, inlinedAt: !53) +!55 = !{!56, !56, i64 0} +!56 = !{!"omnipotent char", !57, i64 0} +!57 = !{!"Simple C/C++ TBAA"} +!58 = !{!59} +!59 = distinct !{!59, !60, !"\01?MakeRayDesc@@YA?AURayDesc@@XZ: %agg.result"} +!60 = distinct !{!60, !"\01?MakeRayDesc@@YA?AURayDesc@@XZ"} +!61 = !DILocation(line: 9, column: 8, scope: !52, inlinedAt: !53) +!62 = !DILocation(line: 9, column: 18, scope: !52, inlinedAt: !53) +!63 = !DILocation(line: 10, column: 8, scope: !52, inlinedAt: !53) +!64 = !DILocation(line: 10, column: 13, scope: !52, inlinedAt: !53) +!65 = !{!66, !66, i64 0} +!66 = !{!"float", !56, i64 0} +!67 = !DILocation(line: 11, column: 8, scope: !52, inlinedAt: !53) +!68 = !DILocation(line: 11, column: 13, scope: !52, inlinedAt: !53) +!69 = !DILocation(line: 28, column: 3, scope: !46) +!70 = !DILocation(line: 30, column: 7, scope: !46) +!71 = !DILocation(line: 21, column: 3, scope: !72, inlinedAt: !73) +!72 = !DISubprogram(name: "Use", scope: !47, file: !47, line: 20, type: !48, isLocal: false, isDefinition: true, scopeLine: 20, flags: DIFlagPrototyped, isOptimized: false) +!73 = distinct !DILocation(line: 30, column: 3, scope: !46) +!74 = !DILocation(line: 32, column: 3, scope: !46) +!75 = !DILocation(line: 33, column: 13, scope: !46) +!76 = !DILocation(line: 33, column: 9, scope: !46) +!77 = !DILocation(line: 33, column: 11, scope: !46) +!78 = !DILocation(line: 34, column: 13, scope: !46) +!79 = !DILocation(line: 34, column: 9, scope: !46) +!80 = !DILocation(line: 34, column: 11, scope: !46) +!81 = !DILocation(line: 35, column: 7, scope: !46) +!82 = !DILocation(line: 21, column: 3, scope: !72, inlinedAt: !83) +!83 = distinct !DILocation(line: 35, column: 3, scope: !46) +!84 = !DILocation(line: 37, column: 13, scope: !46) +!85 = !DILocation(line: 37, column: 9, scope: !46) +!86 = !DILocation(line: 37, column: 11, scope: !46) +!87 = !DILocation(line: 38, column: 13, scope: !46) +!88 = !DILocation(line: 38, column: 9, scope: !46) +!89 = !DILocation(line: 38, column: 11, scope: !46) +!90 = !DILocation(line: 39, column: 7, scope: !46) +!91 = !DILocation(line: 21, column: 3, scope: !72, inlinedAt: !92) +!92 = distinct !DILocation(line: 39, column: 3, scope: !46) +!93 = !DILocation(line: 41, column: 25, scope: !46) +!94 = !DILocation(line: 41, column: 3, scope: !46) +!95 = !DILocation(line: 42, column: 25, scope: !46) +!96 = !DILocation(line: 42, column: 3, scope: !46) +!97 = !DILocation(line: 43, column: 1, scope: !46) diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll index 89ee886c2e..78f7271e94 100644 --- a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_make_scalarrepl.ll @@ -33,7 +33,7 @@ entry: %hit = alloca %dx.types.HitObject, align 4 %tmp = alloca %dx.types.HitObject, align 4 %ray = alloca %struct.RayDesc, align 4 -; CHECK-NOT: %{{[^ ]+}} = alloca %struct.RayDesc +; CHECK-NOT: alloca %struct.RayDesc %tmp2 = alloca %dx.types.HitObject, align 4 ; CHECK: %[[HIT0:[^ ]+]] = alloca %dx.types.HitObject, align 4 ; CHECK: %[[HIT1:[^ ]+]] = alloca %dx.types.HitObject, align 4 @@ -69,7 +69,16 @@ entry: ; CHECK-DAG: %[[RDTMIN:[^ ]+]] = load float, float* %[[pRDTMIN]], ; CHECK-DAG: %[[RDD:[^ ]+]] = load <3 x float>, <3 x float>* %[[pRDD]], ; CHECK-DAG: %[[RDTMAX:[^ ]+]] = load float, float* %[[pRDTMAX]], -; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 387, %dx.types.HitObject* %[[HIT2]], i32 0, i32 1, <3 x float> %[[RDO]], float %[[RDTMIN]], <3 x float> %[[RDD]], float %[[RDTMAX]]) +; Copy introduced for RayDesc argument +; CHECK-DAG: store <3 x float> %[[RDO]], <3 x float>* %[[pRDO2:[^ ]+]], +; CHECK-DAG: store float %[[RDTMIN]], float* %[[pRDTMIN2:[^ ]+]], +; CHECK-DAG: store <3 x float> %[[RDD]], <3 x float>* %[[pRDD2:[^ ]+]], +; CHECK-DAG: store float %[[RDTMAX]], float* %[[pRDTMAX2:[^ ]+]], +; CHECK-DAG: %[[RDO2:[^ ]+]] = load <3 x float>, <3 x float>* %[[pRDO2]], +; CHECK-DAG: %[[RDTMIN2:[^ ]+]] = load float, float* %[[pRDTMIN2]], +; CHECK-DAG: %[[RDD2:[^ ]+]] = load <3 x float>, <3 x float>* %[[pRDD2]], +; CHECK-DAG: %[[RDTMAX2:[^ ]+]] = load float, float* %[[pRDTMAX2]], +; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 387, %dx.types.HitObject* %[[HIT2]], i32 0, i32 1, <3 x float> %[[RDO2]], float %[[RDTMIN2]], <3 x float> %[[RDD2]], float %[[RDTMAX2]]) call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32, i32, %struct.RayDesc*)"(i32 387, %dx.types.HitObject* %tmp2, i32 0, i32 1, %struct.RayDesc* %ray), !dbg !31 ; line:45 col:3 %10 = bitcast %dx.types.HitObject* %tmp2 to i8*, !dbg !31 ; line:45 col:3 call void @llvm.lifetime.end(i64 4, i8* %10) #0, !dbg !31 ; line:45 col:3 diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_traceinvoke_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_traceinvoke_scalarrepl.ll new file mode 100644 index 0000000000..fa22ee5744 --- /dev/null +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/hitobject_traceinvoke_scalarrepl.ll @@ -0,0 +1,198 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; Based on tools/clang/test/CodeGenDXIL/hlsl/objects/HitObject/hitobject_traceinvoke.hlsl + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%"class.RWStructuredBuffer" = type { float } +%ConstantBuffer = type opaque +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%struct.Payload = type { <3 x float> } +%dx.types.HitObject = type { i8* } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.dx::HitObject" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 +@"\01?UAV@@3V?$RWStructuredBuffer@M@@A" = external global %"class.RWStructuredBuffer", align 4 +@"$Globals" = external constant %ConstantBuffer + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +entry: + %rayDesc = alloca %struct.RayDesc, align 4 + %pld = alloca %struct.Payload, align 4 + + ; CHECK: %[[HITOBJ:[^ ,]+]] = alloca %dx.types.HitObject, align 4 + + %hit = alloca %dx.types.HitObject, align 4 + + %0 = bitcast %struct.RayDesc* %rayDesc to i8*, !dbg !37 ; line:82 col:3 + call void @llvm.lifetime.start(i64 32, i8* %0) #0, !dbg !37 ; line:82 col:3 + + ; Init RayDesc. + ; CHECK-DAG: store <3 x float> , <3 x float>* %[[ORIGIN_P0:[^ ,]+]], align 4 + ; CHECK-DAG: store float 3.000000e+00, float* %[[TMIN_P0:[^ ,]+]], align 4 + ; CHECK-DAG: store <3 x float> , <3 x float>* %[[DIRECTION_P0:[^ ,]+]], align 4 + ; CHECK-DAG: store float 7.000000e+00, float* %[[TMAX_P0:[^ ,]+]], align 4 + + %Origin = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 0, !dbg !41 ; line:83 col:11 + store <3 x float> , <3 x float>* %Origin, align 4, !dbg !42, !tbaa !43 ; line:83 col:18 + %TMin = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 1, !dbg !46 ; line:84 col:11 + store float 3.000000e+00, float* %TMin, align 4, !dbg !47, !tbaa !48 ; line:84 col:16 + %Direction = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 2, !dbg !50 ; line:85 col:11 + store <3 x float> , <3 x float>* %Direction, align 4, !dbg !51, !tbaa !43 ; line:85 col:21 + %TMax = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %rayDesc, i32 0, i32 3, !dbg !52 ; line:86 col:11 + store float 7.000000e+00, float* %TMax, align 4, !dbg !53, !tbaa !48 ; line:86 col:16 + + %1 = bitcast %struct.Payload* %pld to i8*, !dbg !54 ; line:88 col:3 + call void @llvm.lifetime.start(i64 12, i8* %1) #0, !dbg !54 ; line:88 col:3 + %dummy = getelementptr inbounds %struct.Payload, %struct.Payload* %pld, i32 0, i32 0, !dbg !55 ; line:89 col:7 + store <3 x float> , <3 x float>* %dummy, align 4, !dbg !56, !tbaa !43 ; line:89 col:13 + %2 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !57 ; line:91 col:3 + call void @llvm.lifetime.start(i64 4, i8* %2) #0, !dbg !57 ; line:91 col:3 + %3 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !58 ; line:91 col:23 + %4 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %3), !dbg !58 ; line:91 col:23 + + ; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) + + %5 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %4, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !58 ; line:91 col:23 + + ; Copy RayDesc. + ; CHECK-DAG: %[[ORIGIN_L0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIGIN_P0]] + ; CHECK-DAG: store <3 x float> %[[ORIGIN_L0]], <3 x float>* %[[ORIGIN_P1:[^ ,]+]] + ; CHECK-DAG: %[[TMIN_L0:[^ ,]+]] = load float, float* %[[TMIN_P0]] + ; CHECK-DAG: store float %[[TMIN_L0]], float* %[[TMIN_P1:[^ ,]+]] + ; CHECK-DAG: %[[DIRECTION_L0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_P0]] + ; CHECK-DAG: store <3 x float> %[[DIRECTION_L0]], <3 x float>* %[[DIRECTION_P1:[^ ,]+]] + ; CHECK-DAG: %[[TMAX_L0:[^ ,]+]] = load float, float* %[[TMAX_P0]] + ; CHECK-DAG: store float %[[TMAX_L0]], float* %[[TMAX_P1:[^ ,]+]] + + ; Load RayDesc. + ; CHECK-DAG: %[[ORIGIN_L1:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIGIN_P1]] + ; CHECK-DAG: %[[TMIN_L1:[^ ,]+]] = load float, float* %[[TMIN_P1]] + ; CHECK-DAG: %[[DIRECTION_L1:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_P1]] + ; CHECK-DAG: %[[TMAX_L1:[^ ,]+]] = load float, float* %[[TMAX_P1]] + + ; RayDesc is scalar replaced in HL op for dx::HitObject::TraceRay. + ; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32 389, %dx.types.HitObject* %[[HITOBJ]], %dx.types.Handle %[[RTAS]], i32 513, i32 1, i32 2, i32 4, i32 0, <3 x float> %[[ORIGIN_L1]], float %[[TMIN_L1]], <3 x float> %[[DIRECTION_L1]], float %[[TMAX_L1]], %struct.Payload* %[[PLD_P0:[^ ,]+]]) + + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32 389, %dx.types.HitObject* %hit, %dx.types.Handle %5, i32 513, i32 1, i32 2, i32 4, i32 0, %struct.RayDesc* %rayDesc, %struct.Payload* %pld), !dbg !58 ; line:91 col:23 + + ; Copy payload. + ; CHECK: %[[GEP_PLD_P0:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P0]], i32 0, i32 0 + ; CHECK: %[[PLD_L0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[GEP_PLD_P0]] + ; CHECK: store <3 x float> %[[PLD_L0]], <3 x float>* %[[PLD_M0_P0:[^ ,]+]] + ; CHECK: %[[GEP_PLD_P1:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P1:[^ ,]+]], i32 0, i32 0 + ; CHECK: [[PLD_L1:[^ ,]+]] = load <3 x float>, <3 x float>* %[[PLD_M0_P0]] + ; CHECK: store <3 x float> [[PLD_L1]], <3 x float>* %[[GEP_PLD_P1]] + + ; dx::HitObject::Invoke + ; CHECK: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32 382, %dx.types.HitObject* %[[HITOBJ]], %struct.Payload* %[[PLD_P1]]) + + call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32 382, %dx.types.HitObject* %hit, %struct.Payload* %pld), !dbg !59 ; line:101 col:3 + + %6 = bitcast %dx.types.HitObject* %hit to i8*, !dbg !60 ; line:102 col:1 + call void @llvm.lifetime.end(i64 4, i8* %6) #0, !dbg !60 ; line:102 col:1 + %7 = bitcast %struct.Payload* %pld to i8*, !dbg !60 ; line:102 col:1 + call void @llvm.lifetime.end(i64 12, i8* %7) #0, !dbg !60 ; line:102 col:1 + %8 = bitcast %struct.RayDesc* %rayDesc to i8*, !dbg !60 ; line:102 col:1 + call void @llvm.lifetime.end(i64 32, i8* %8) #0, !dbg !60 ; line:102 col:1 + ret void, !dbg !60 ; line:102 col:1 +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32, %dx.types.HitObject*, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.Payload*)"(i32, %dx.types.HitObject*, %struct.Payload*) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!3} +!dx.shaderModel = !{!4} +!dx.typeAnnotations = !{!5, !20} +!dx.entryPoints = !{!24} +!dx.fnprops = !{!34} +!dx.options = !{!35, !36} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.4928 (ser_hlslattributes_patch, 937c16cc6)"} +!3 = !{i32 1, i32 9} +!4 = !{!"lib", i32 6, i32 9} +!5 = !{i32 0, %"class.RWStructuredBuffer" undef, !6, %struct.RayDesc undef, !11, %struct.Payload undef, !16, %"class.dx::HitObject" undef, !18} +!6 = !{i32 4, !7, !8} +!7 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 9} +!8 = !{i32 0, !9} +!9 = !{!10} +!10 = !{i32 0, float undef} +!11 = !{i32 32, !12, !13, !14, !15} +!12 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!13 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!14 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9, i32 13, i32 3} +!15 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!16 = !{i32 12, !17} +!17 = !{i32 6, !"dummy", i32 3, i32 0, i32 7, i32 9, i32 13, i32 3} +!18 = !{i32 4, !19} +!19 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 4} +!20 = !{i32 1, void ()* @"\01?main@@YAXXZ", !21} +!21 = !{!22} +!22 = !{i32 1, !23, !23} +!23 = !{} +!24 = !{null, !"", null, !25, null} +!25 = !{!26, !29, !32, null} +!26 = !{!27} +!27 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !28} +!28 = !{i32 0, i32 4} +!29 = !{!30} +!30 = !{i32 0, %"class.RWStructuredBuffer"* @"\01?UAV@@3V?$RWStructuredBuffer@M@@A", !"UAV", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !31} +!31 = !{i32 1, i32 4} +!32 = !{!33} +!33 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} +!34 = !{void ()* @"\01?main@@YAXXZ", i32 7} +!35 = !{i32 -2147483584} +!36 = !{i32 -1} +!37 = !DILocation(line: 82, column: 3, scope: !38) +!38 = !DISubprogram(name: "main", scope: !39, file: !39, line: 81, type: !40, isLocal: false, isDefinition: true, scopeLine: 81, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @"\01?main@@YAXXZ") +!39 = !DIFile(filename: "D:\5Cgit\5Cdxc\5Cmain\5Ctools\5Cclang\5Ctest\5CCodeGenDXIL\5Chlsl\5Cobjects\5CHitObject\5Chitobject_traceinvoke.hlsl", directory: "") +!40 = !DISubroutineType(types: !23) +!41 = !DILocation(line: 83, column: 11, scope: !38) +!42 = !DILocation(line: 83, column: 18, scope: !38) +!43 = !{!44, !44, i64 0} +!44 = !{!"omnipotent char", !45, i64 0} +!45 = !{!"Simple C/C++ TBAA"} +!46 = !DILocation(line: 84, column: 11, scope: !38) +!47 = !DILocation(line: 84, column: 16, scope: !38) +!48 = !{!49, !49, i64 0} +!49 = !{!"float", !44, i64 0} +!50 = !DILocation(line: 85, column: 11, scope: !38) +!51 = !DILocation(line: 85, column: 21, scope: !38) +!52 = !DILocation(line: 86, column: 11, scope: !38) +!53 = !DILocation(line: 86, column: 16, scope: !38) +!54 = !DILocation(line: 88, column: 3, scope: !38) +!55 = !DILocation(line: 89, column: 7, scope: !38) +!56 = !DILocation(line: 89, column: 13, scope: !38) +!57 = !DILocation(line: 91, column: 3, scope: !38) +!58 = !DILocation(line: 91, column: 23, scope: !38) +!59 = !DILocation(line: 101, column: 3, scope: !38) +!60 = !DILocation(line: 102, column: 1, scope: !38) diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll new file mode 100644 index 0000000000..59551a7eb4 --- /dev/null +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/traceray_scalarrepl.ll @@ -0,0 +1,182 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%"$Globals" = type { i32, i32, i32, i32, i32 } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%struct.Payload = type { <2 x float>, <3 x i32> } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?Acc@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 +@"\01?RayFlags@@3IB" = external constant i32, align 4 +@"\01?InstanceInclusionMask@@3IB" = external constant i32, align 4 +@"\01?RayContributionToHitGroupIndex@@3IB" = external constant i32, align 4 +@"\01?MultiplierForGeometryContributionToHitGroupIndex@@3IB" = external constant i32, align 4 +@"\01?MissShaderIndex@@3IB" = external constant i32, align 4 +@"$Globals" = external constant %"$Globals" + +; CHECK: define <4 x float> @" +; CHECK-SAME: ?emit@@YA?AV?$vector@M$03@@AIAV?$vector@M$01@@URayDesc@@UPayload@@@Z"(<2 x float>* noalias dereferenceable(8) %f2, %struct.RayDesc* %Ray, %struct.Payload* noalias %p) + +; Function Attrs: nounwind +define <4 x float> @"\01?emit@@YA?AV?$vector@M$03@@AIAV?$vector@M$01@@URayDesc@@UPayload@@@Z"(<2 x float>* noalias dereferenceable(8) %f2, %struct.RayDesc* %Ray, %struct.Payload* noalias %p) #0 { +entry: + + ; Copy Payload fields (PLD_F0, PLD_F1) to local allocas: + ; CHECK: %[[GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %p, i32 0, i32 0 + ; CHECK: %[[LOAD:[^ ,]+]] = load <2 x float>, <2 x float>* %[[GEP]] + ; CHECK: store <2 x float> %[[LOAD]], <2 x float>* %[[PLD_F0:[^ ,]+]] + ; CHECK: %[[GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %p, i32 0, i32 1 + ; CHECK: %[[LOAD:[^ ,]+]] = load <3 x i32>, <3 x i32>* %[[GEP]] + ; CHECK: store <3 x i32> %[[LOAD]], <3 x i32>* %[[PLD_F1:[^ ,]+]] + + %0 = alloca %struct.RayDesc, !dbg !39 ; line:22 col:61 + %1 = bitcast %struct.RayDesc* %0 to i8*, !dbg !39 ; line:22 col:61 + %2 = bitcast %struct.RayDesc* %Ray to i8*, !dbg !39 ; line:22 col:61 + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 32, i32 1, i1 false), !dbg !39 ; line:22 col:61 + %3 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32 0, %"$Globals"* @"$Globals", i32 0), !dbg !39 ; line:22 col:61 + %4 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 13, i32 20 }, %"$Globals" undef), !dbg !39 ; line:22 col:61 + %5 = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %4, i32 0), !dbg !39 ; line:22 col:61 + %6 = getelementptr inbounds %"$Globals", %"$Globals"* %5, i32 0, i32 0, !dbg !39 ; line:22 col:61 + %7 = getelementptr inbounds %"$Globals", %"$Globals"* %5, i32 0, i32 1, !dbg !39 ; line:22 col:61 + %8 = getelementptr inbounds %"$Globals", %"$Globals"* %5, i32 0, i32 2, !dbg !39 ; line:22 col:61 + %9 = getelementptr inbounds %"$Globals", %"$Globals"* %5, i32 0, i32 3, !dbg !39 ; line:22 col:61 + %10 = getelementptr inbounds %"$Globals", %"$Globals"* %5, i32 0, i32 4, !dbg !39 ; line:22 col:61 + %11 = load i32, i32* %10, align 4, !dbg !39, !tbaa !43 ; line:22 col:61 + %12 = load i32, i32* %9, align 4, !dbg !47, !tbaa !43 ; line:22 col:12 + %13 = load i32, i32* %8, align 4, !dbg !48, !tbaa !43 ; line:21 col:12 + %14 = load i32, i32* %7, align 4, !dbg !49, !tbaa !43 ; line:20 col:25 + %15 = load i32, i32* %6, align 4, !dbg !50, !tbaa !43 ; line:20 col:16 + %16 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?Acc@@3URaytracingAccelerationStructure@@A", !dbg !51 ; line:20 col:3 + %17 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %16), !dbg !51 ; line:20 col:3 + + ; CHECK: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) + %18 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %17, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !51 ; line:20 col:3 + + ; Copy RayDesc fields (Origin, TMin, Direction, TMax) to local allocas: + ; CHECK: %[[RAY_ORIGIN_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 0 + ; CHECK: %[[RAY_ORIGIN_LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RAY_ORIGIN_GEP]] + ; CHECK: store <3 x float> %[[RAY_ORIGIN_LOAD]], <3 x float>* %[[RAY_ORIGIN_P0:[^ ,]+]] + ; CHECK: %[[TMIN_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 1 + ; CHECK: %[[TMIN_LOAD:[^ ,]+]] = load float, float* %[[TMIN_GEP]] + ; CHECK: store float %[[TMIN_LOAD]], float* %[[TMIN_P0:[^ ,]+]] + ; CHECK: %[[DIRECTION_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 2 + ; CHECK: %[[DIRECTION_LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_GEP]] + ; CHECK: store <3 x float> %[[DIRECTION_LOAD]], <3 x float>* %[[DIRECTION_P0:[^ ,]+]] + ; CHECK: %[[TMAX_GEP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %Ray, i32 0, i32 3 + ; CHECK: %[[TMAX_LOAD:[^ ,]+]] = load float, float* %[[TMAX_GEP]] + ; CHECK: store float %[[TMAX_LOAD]], float* %[[TMAX_P0:[^ ,]+]] + + ; Copy Payload fields into payload struct for call: + ; CHECK: %[[PLD_F0_GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P0:[^ ,]+]], i32 0, i32 0 + ; CHECK: %[[PLD_F0_LOAD:[^ ,]+]] = load <2 x float>, <2 x float>* %[[PLD_F0]] + ; CHECK: store <2 x float> %[[PLD_F0_LOAD]], <2 x float>* %[[PLD_F0_GEP]] + ; CHECK: %[[PLD_F1_GEP:[^ ,]+]] = getelementptr inbounds %struct.Payload, %struct.Payload* %[[PLD_P0]], i32 0, i32 1 + ; CHECK: %[[PLD_F1_LOAD:[^ ,]+]] = load <3 x i32>, <3 x i32>* %[[PLD_F1]] + ; CHECK: store <3 x i32> %[[PLD_F1_LOAD]], <3 x i32>* %[[PLD_F1_GEP]] + + ; Load RayDesc fields: + ; CHECK: %[[RAY_ORIGIN_LOAD2:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RAY_ORIGIN_P0]] + ; CHECK: %[[TMIN_LOAD2:[^ ,]+]] = load float, float* %[[TMIN_P0]] + ; CHECK: %[[DIRECTION_LOAD2:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIRECTION_P0]] + ; CHECK: %[[TMAX_LOAD2:[^ ,]+]] = load float, float* %[[TMAX_P0]] + + ; call TraceRay with the local allocas: + ; CHECK: call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32, i32, i32, i32, <3 x float>, float, <3 x float>, float, %struct.Payload*)"(i32 69, %dx.types.Handle %[[RTAS]], i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, i32 %{{[^ ,]+}}, <3 x float> %[[RAY_ORIGIN_LOAD2]], float %[[TMIN_LOAD2]], <3 x float> %[[DIRECTION_LOAD2]], float %[[TMAX_LOAD2]], %struct.Payload* %[[PLD_P0]]) + + call void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32 69, %dx.types.Handle %18, i32 %15, i32 %14, i32 %13, i32 %12, i32 %11, %struct.RayDesc* %0, %struct.Payload* %p), !dbg !51 ; line:20 col:3 + + ret <4 x float> , !dbg !52 ; line:24 col:4 +} + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*)"(i32, %dx.types.Handle, i32, i32, i32, i32, i32, %struct.RayDesc*, %struct.Payload*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32, %"$Globals"*, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"$Globals") #1 + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !21} +!dx.entryPoints = !{!30} +!dx.fnprops = !{} +!dx.options = !{!37, !38} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.4928 (ser_hlslattributes_patch, 937c16cc6)"} +!3 = !{i32 1, i32 3} +!4 = !{i32 1, i32 9} +!5 = !{!"lib", i32 6, i32 3} +!6 = !{i32 0, %struct.RayDesc undef, !7, %struct.Payload undef, !12, %"$Globals" undef, !15} +!7 = !{i32 32, !8, !9, !10, !11} +!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9} +!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!12 = !{i32 28, !13, !14} +!13 = !{i32 6, !"t", i32 3, i32 0, i32 7, i32 9} +!14 = !{i32 6, !"t2", i32 3, i32 16, i32 7, i32 4} +!15 = !{i32 20, !16, !17, !18, !19, !20} +!16 = !{i32 6, !"RayFlags", i32 3, i32 0, i32 7, i32 5} +!17 = !{i32 6, !"InstanceInclusionMask", i32 3, i32 4, i32 7, i32 5} +!18 = !{i32 6, !"RayContributionToHitGroupIndex", i32 3, i32 8, i32 7, i32 5} +!19 = !{i32 6, !"MultiplierForGeometryContributionToHitGroupIndex", i32 3, i32 12, i32 7, i32 5} +!20 = !{i32 6, !"MissShaderIndex", i32 3, i32 16, i32 7, i32 5} +!21 = !{i32 1, <4 x float> (<2 x float>*, %struct.RayDesc*, %struct.Payload*)* @"\01?emit@@YA?AV?$vector@M$03@@AIAV?$vector@M$01@@URayDesc@@UPayload@@@Z", !22} +!22 = !{!23, !26, !27, !29} +!23 = !{i32 1, !24, !25} +!24 = !{i32 7, i32 9} +!25 = !{} +!26 = !{i32 2, !24, !25} +!27 = !{i32 0, !28, !25} +!28 = !{i32 4, !"R"} +!29 = !{i32 2, !25, !25} +!30 = !{null, !"", null, !31, null} +!31 = !{!32, null, !35, null} +!32 = !{!33} +!33 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?Acc@@3URaytracingAccelerationStructure@@A", !"Acc", i32 -1, i32 -1, i32 1, i32 16, i32 0, !34} +!34 = !{i32 0, i32 4} +!35 = !{!36} +!36 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 20, null} +!37 = !{i32 -2147483584} +!38 = !{i32 11} +!39 = !DILocation(line: 22, column: 61, scope: !40) +!40 = !DISubprogram(name: "emit", scope: !41, file: !41, line: 19, type: !42, isLocal: false, isDefinition: true, scopeLine: 19, flags: DIFlagPrototyped, isOptimized: false, function: <4 x float> (<2 x float>*, %struct.RayDesc*, %struct.Payload*)* @"\01?emit@@YA?AV?$vector@M$03@@AIAV?$vector@M$01@@URayDesc@@UPayload@@@Z") +!41 = !DIFile(filename: "D:\5Cgit\5Cdxc\5Cmain\5Ctools\5Cclang\5Ctest\5CHLSLFileCheck\5Cshader_targets\5Craytracing\5Craytracing_traceray.hlsl", directory: "") +!42 = !DISubroutineType(types: !25) +!43 = !{!44, !44, i64 0} +!44 = !{!"int", !45, i64 0} +!45 = !{!"omnipotent char", !46, i64 0} +!46 = !{!"Simple C/C++ TBAA"} +!47 = !DILocation(line: 22, column: 12, scope: !40) +!48 = !DILocation(line: 21, column: 12, scope: !40) +!49 = !DILocation(line: 20, column: 25, scope: !40) +!50 = !DILocation(line: 20, column: 16, scope: !40) +!51 = !DILocation(line: 20, column: 3, scope: !40) +!52 = !DILocation(line: 24, column: 4, scope: !40) diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll new file mode 100644 index 0000000000..c01ec797bb --- /dev/null +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_cb_raydesc_scalarrepl.ll @@ -0,0 +1,154 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; Based on tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%"$Globals" = type { %struct.RayDesc } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.RayQuery<513, 0>" = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 +@"\01?rayDesc@@3URayDesc@@B" = external constant %struct.RayDesc, align 4 +@"$Globals" = external constant %"$Globals" + +; Function Attrs: nounwind +define void @main() #0 { +entry: + %0 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32 0, %"$Globals"* @"$Globals", i32 0) + + ; Capture CB, RayDesc ptr from CB, RTAS, and init RayQuery + ; CHECK-DAG: %[[CB_H:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 13, i32 32 }, %"$Globals" undef) + + %1 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32 14, %dx.types.Handle %0, %dx.types.ResourceProperties { i32 13, i32 32 }, %"$Globals" undef) + + ; CHECK-DAG: %[[CB_PTR:[^ ,]+]] = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %[[CB_H]], i32 0) + + %2 = call %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32 6, %dx.types.Handle %1, i32 0) + + ; CHECK-DAG: %[[RAYDESC_PTR:[^ ,]+]] = getelementptr inbounds %"$Globals", %"$Globals"* %[[CB_PTR]], i32 0, i32 0 + + %3 = getelementptr inbounds %"$Globals", %"$Globals"* %2, i32 0, i32 0 + + ; CHECK-DAG: %[[RQ0:[^ ,]+]] = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0) + ; CHECK-DAG: store i32 %[[RQ0]], i32* %[[RQ_P0:[^ ,]+]] + + %rayQuery = alloca %"class.RayQuery<513, 0>", align 4 + %rayQuery1 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0), !dbg !34 ; line:12 col:71 + %4 = getelementptr inbounds %"class.RayQuery<513, 0>", %"class.RayQuery<513, 0>"* %rayQuery, i32 0, i32 0, !dbg !34 ; line:12 col:71 + store i32 %rayQuery1, i32* %4, !dbg !34 ; line:12 col:71 + + %5 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !38 ; line:13 col:3 + %6 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %5), !dbg !38 ; line:13 col:3 + + ; CHECK-DAG: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) + + %7 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %6, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !38 ; line:13 col:3 + + ; Load RayDesc fields from CB to local copy + ; CHECK-DAG: %[[ORIG_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 0 + ; CHECK-DAG: %[[ORIG_LD_CB:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIG_CBP]] + ; CHECK-DAG: store <3 x float> %[[ORIG_LD_CB]], <3 x float>* %[[ORIG_P0:[^ ,]+]] + ; CHECK-DAG: %[[TMIN_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 1 + ; CHECK-DAG: %[[TMIN_LD_CB:[^ ,]+]] = load float, float* %[[TMIN_CBP]] + ; CHECK-DAG: store float %[[TMIN_LD_CB]], float* %[[TMIN_P0:[^ ,]+]] + ; CHECK-DAG: %[[DIR_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 2 + ; CHECK-DAG: %[[DIR_LD_CB:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIR_CBP]] + ; CHECK-DAG: store <3 x float> %[[DIR_LD_CB]], <3 x float>* %[[DIR_P0:[^ ,]+]] + ; CHECK-DAG: %[[TMAX_CBP:[^ ,]+]] = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* %[[RAYDESC_PTR]], i32 0, i32 3 + ; CHECK-DAG: %[[TMAX_LD_CB:[^ ,]+]] = load float, float* %[[TMAX_CBP]] + ; CHECK-DAG: store float %[[TMAX_LD_CB]], float* %[[TMAX_P0:[^ ,]+]] + + ; Load RayDesc fields from local copy + ; CHECK-DAG: %[[ORIG:[^ ,]+]] = load <3 x float>, <3 x float>* %[[ORIG_P0]] + ; CHECK-DAG: %[[TMIN:[^ ,]+]] = load float, float* %[[TMIN_P0]] + ; CHECK-DAG: %[[DIR:[^ ,]+]] = load <3 x float>, <3 x float>* %[[DIR_P0]] + ; CHECK-DAG: %[[TMAX:[^ ,]+]] = load float, float* %[[TMAX_P0]] + ; CHECK-DAG: %[[RQ:[^ ,]+]] = load i32, i32* %[[RQ_P0]] + + ; Call TraceRayInline + ; CHECK: call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, <3 x float> %[[ORIG]], float %[[TMIN]], <3 x float> %[[DIR]], float %[[TMAX]]) + + call void @"dx.hl.op..void (i32, %\22class.RayQuery<513, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32 325, %"class.RayQuery<513, 0>"* %rayQuery, %dx.types.Handle %7, i32 1, i32 2, %struct.RayDesc* %3), !dbg !38 ; line:13 col:3 + ret void, !dbg !39 ; line:14 col:1 +} + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %\22class.RayQuery<513, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32, %"class.RayQuery<513, 0>"*, %dx.types.Handle, i32, i32, %struct.RayDesc*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %"$Globals"* @"dx.hl.subscript.cb.rn.%\22$Globals\22* (i32, %dx.types.Handle, i32)"(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %\22$Globals\22*, i32)"(i32, %"$Globals"*, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %\22$Globals\22)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %"$Globals") #1 + +; Function Attrs: nounwind +declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !20} +!dx.entryPoints = !{!24} +!dx.fnprops = !{!31} +!dx.options = !{!32, !33} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.14861 (main, 33bc44a3d)"} +!3 = !{i32 1, i32 5} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 5} +!6 = !{i32 0, %struct.RayDesc undef, !7, %"class.RayQuery<513, 0>" undef, !12, %"$Globals" undef, !18} +!7 = !{i32 32, !8, !9, !10, !11} +!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9} +!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!12 = !{i32 4, !13, !14} +!13 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} +!14 = !{i32 0, !15} +!15 = !{!16, !17} +!16 = !{i32 1, i64 513} +!17 = !{i32 1, i64 0} +!18 = !{i32 32, !19} +!19 = !{i32 6, !"rayDesc", i32 3, i32 0} +!20 = !{i32 1, void ()* @main, !21} +!21 = !{!22} +!22 = !{i32 1, !23, !23} +!23 = !{} +!24 = !{void ()* @main, !"main", null, !25, null} +!25 = !{!26, null, !29, null} +!26 = !{!27} +!27 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !28} +!28 = !{i32 0, i32 4} +!29 = !{!30} +!30 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 32, null} +!31 = !{void ()* @main, i32 1} +!32 = !{i32 64} +!33 = !{i32 -1} +!34 = !DILocation(line: 12, column: 71, scope: !35) +!35 = !DISubprogram(name: "main", scope: !36, file: !36, line: 11, type: !37, isLocal: false, isDefinition: true, scopeLine: 11, flags: DIFlagPrototyped, isOptimized: false, function: void ()* @main) +!36 = !DIFile(filename: "/home/texr/git/dxc/main/tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline_cb_raydesc.hlsl", directory: "") +!37 = !DISubroutineType(types: !23) +!38 = !DILocation(line: 13, column: 3, scope: !35) +!39 = !DILocation(line: 14, column: 1, scope: !35) diff --git a/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll new file mode 100644 index 0000000000..ee76872441 --- /dev/null +++ b/tools/clang/test/DXC/Passes/ScalarReplHLSL/tracerayinline_scalarrepl.ll @@ -0,0 +1,155 @@ +; RUN: %dxopt %s -hlsl-passes-resume -scalarrepl-param-hlsl -S | FileCheck %s + +; Based on tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl, +; with call to DoTrace commented out. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.RaytracingAccelerationStructure = type { i32 } +%ConstantBuffer = type opaque +%struct.RayDesc = type { <3 x float>, float, <3 x float>, float } +%"class.RayQuery<513, 0>" = type { i32 } +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external global %struct.RaytracingAccelerationStructure, align 4 +@"$Globals" = external constant %ConstantBuffer + +; CHECK: define void @main(float* noalias, <3 x float>, float, <3 x float>, float) + +; Function Attrs: nounwind +define float @main(%struct.RayDesc* %rayDesc) #0 { +entry: + %0 = alloca %struct.RayDesc + + ; Copy flattened RayDesc input to main function + ; RayDesc fields: %1: Origin, %2: TMin, %3: Direction, %4: TMax + ; CHECK: store float %4, float* %[[RD3_P0:[^ ,]+]] + ; CHECK: store <3 x float> %3, <3 x float>* %[[RD2_P0:[^ ,]+]] + ; CHECK: store float %2, float* %[[RD1_P0:[^ ,]+]] + ; CHECK: store <3 x float> %1, <3 x float>* %[[RD0_P0:[^ ,]+]] + + ; Copy RayDesc fields again + ; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P0]] + ; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD0_P1:[^ ,]+]] + ; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD1_P0]] + ; CHECK: store float %[[LOAD]], float* %[[RD1_P1:[^ ,]+]] + ; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P0]] + ; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD2_P1:[^ ,]+]] + ; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD3_P0]] + ; CHECK: store float %[[LOAD]], float* %[[RD3_P1:[^ ,]+]] + + %1 = bitcast %struct.RayDesc* %0 to i8* + %2 = bitcast %struct.RayDesc* %rayDesc to i8* + call void @llvm.memcpy.p0i8.p0i8.i64(i8* %1, i8* %2, i64 32, i32 1, i1 false) + + ; Capture RayQuery ptr and RTAS handle + ; CHECK: %[[RQ0:[^ ]+]] = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0) + ; CHECK: store i32 %[[RQ0]], i32* %[[RQ_P0:[^ ,]+]] + ; CHECK: %[[RTAS:[^ ,]+]] = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %{{[^ ,]+}}, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef) + + %rayQuery = alloca %"class.RayQuery<513, 0>", align 4 + %rayQuery1 = call i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32 4, i32 513, i32 0), !dbg !35 ; line:15 col:71 + %3 = getelementptr inbounds %"class.RayQuery<513, 0>", %"class.RayQuery<513, 0>"* %rayQuery, i32 0, i32 0, !dbg !35 ; line:15 col:71 + store i32 %rayQuery1, i32* %3, !dbg !35 ; line:15 col:71 + %4 = load %struct.RaytracingAccelerationStructure, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !dbg !39 ; line:17 col:3 + %5 = call %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32 0, %struct.RaytracingAccelerationStructure %4), !dbg !39 ; line:17 col:3 + %6 = call %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32 14, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 16, i32 0 }, %struct.RaytracingAccelerationStructure undef), !dbg !39 ; line:17 col:3 + + ; Copy RayDesc fields again + ; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P1]] + ; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD0_P2:[^ ,]+]] + ; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD1_P1]] + ; CHECK: store float %[[LOAD]], float* %[[RD1_P2:[^ ,]+]] + ; CHECK: %[[LOAD:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P1]] + ; CHECK: store <3 x float> %[[LOAD]], <3 x float>* %[[RD2_P2:[^ ,]+]] + ; CHECK: %[[LOAD:[^ ,]+]] = load float, float* %[[RD3_P1]] + ; CHECK: store float %[[LOAD]], float* %[[RD3_P2:[^ ,]+]] + + ; Load RayDesc fields for TraceRayInline + ; CHECK: %[[RD0:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD0_P2]] + ; CHECK: %[[RD1:[^ ,]+]] = load float, float* %[[RD1_P2]] + ; CHECK: %[[RD2:[^ ,]+]] = load <3 x float>, <3 x float>* %[[RD2_P2]] + ; CHECK: %[[RD3:[^ ,]+]] = load float, float* %[[RD3_P2]] + + ; Load RayQuery + ; CHECK: %[[RQ:[^ ,]+]] = load i32, i32* %[[RQ_P0]] + + ; TraceRayInline call + ; CHECK: call void @"dx.hl.op..void (i32, i32, %dx.types.Handle, i32, i32, <3 x float>, float, <3 x float>, float)"(i32 325, i32 %[[RQ]], %dx.types.Handle %[[RTAS]], i32 1, i32 2, <3 x float> %[[RD0]], float %[[RD1]], <3 x float> %[[RD2]], float %[[RD3]]) + + call void @"dx.hl.op..void (i32, %\22class.RayQuery<513, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32 325, %"class.RayQuery<513, 0>"* %rayQuery, %dx.types.Handle %6, i32 1, i32 2, %struct.RayDesc* %0), !dbg !39 ; line:17 col:3 + ret float 0.000000e+00, !dbg !40 ; line:18 col:3 +} + +; Function Attrs: nounwind +declare void @"dx.hl.op..void (i32, %\22class.RayQuery<513, 0>\22*, %dx.types.Handle, i32, i32, %struct.RayDesc*)"(i32, %"class.RayQuery<513, 0>"*, %dx.types.Handle, i32, i32, %struct.RayDesc*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.createhandle..%dx.types.Handle (i32, %struct.RaytracingAccelerationStructure)"(i32, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @"dx.hl.annotatehandle..%dx.types.Handle (i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure)"(i32, %dx.types.Handle, %dx.types.ResourceProperties, %struct.RaytracingAccelerationStructure) #1 + +; Function Attrs: nounwind +declare i32 @"dx.hl.op..i32 (i32, i32, i32)"(i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @llvm.memcpy.p0i8.p0i8.i64(i8* nocapture, i8* nocapture readonly, i64, i32, i1) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!llvm.module.flags = !{!0} +!pauseresume = !{!1} +!llvm.ident = !{!2} +!dx.version = !{!3} +!dx.valver = !{!4} +!dx.shaderModel = !{!5} +!dx.typeAnnotations = !{!6, !18} +!dx.entryPoints = !{!25} +!dx.fnprops = !{!32} +!dx.options = !{!33, !34} + +!0 = !{i32 2, !"Debug Info Version", i32 3} +!1 = !{!"hlsl-hlemit", !"hlsl-hlensure"} +!2 = !{!"dxc(private) 1.8.0.14861 (main, 33bc44a3d)"} +!3 = !{i32 1, i32 5} +!4 = !{i32 1, i32 9} +!5 = !{!"vs", i32 6, i32 5} +!6 = !{i32 0, %struct.RayDesc undef, !7, %"class.RayQuery<513, 0>" undef, !12} +!7 = !{i32 32, !8, !9, !10, !11} +!8 = !{i32 6, !"Origin", i32 3, i32 0, i32 7, i32 9} +!9 = !{i32 6, !"TMin", i32 3, i32 12, i32 7, i32 9} +!10 = !{i32 6, !"Direction", i32 3, i32 16, i32 7, i32 9} +!11 = !{i32 6, !"TMax", i32 3, i32 28, i32 7, i32 9} +!12 = !{i32 4, !13, !14} +!13 = !{i32 6, !"h", i32 3, i32 0, i32 7, i32 5} +!14 = !{i32 0, !15} +!15 = !{!16, !17} +!16 = !{i32 1, i64 513} +!17 = !{i32 1, i64 0} +!18 = !{i32 1, float (%struct.RayDesc*)* @main, !19} +!19 = !{!20, !23} +!20 = !{i32 1, !21, !22} +!21 = !{i32 4, !"OUT", i32 7, i32 9} +!22 = !{} +!23 = !{i32 0, !24, !22} +!24 = !{i32 4, !"RAYDESC"} +!25 = !{float (%struct.RayDesc*)* @main, !"main", null, !26, null} +!26 = !{!27, null, !30, null} +!27 = !{!28} +!28 = !{i32 0, %struct.RaytracingAccelerationStructure* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !29} +!29 = !{i32 0, i32 4} +!30 = !{!31} +!31 = !{i32 0, %ConstantBuffer* @"$Globals", !"$Globals", i32 0, i32 -1, i32 1, i32 0, null} +!32 = !{float (%struct.RayDesc*)* @main, i32 1} +!33 = !{i32 64} +!34 = !{i32 -1} +!35 = !DILocation(line: 15, column: 71, scope: !36) +!36 = !DISubprogram(name: "main", scope: !37, file: !37, line: 14, type: !38, isLocal: false, isDefinition: true, scopeLine: 14, flags: DIFlagPrototyped, isOptimized: false, function: float (%struct.RayDesc*)* @main) +!37 = !DIFile(filename: "tools/clang/test/CodeGenDXIL/hlsl/objects/RayQuery/tracerayinline.hlsl", directory: "") +!38 = !DISubroutineType(types: !22) +!39 = !DILocation(line: 17, column: 3, scope: !36) +!40 = !DILocation(line: 18, column: 3, scope: !36) diff --git a/tools/clang/test/DXC/deprecated-select-validator.hlsl b/tools/clang/test/DXC/deprecated-select-validator.hlsl new file mode 100644 index 0000000000..2ad3e5199c --- /dev/null +++ b/tools/clang/test/DXC/deprecated-select-validator.hlsl @@ -0,0 +1,14 @@ +// Test that the deprecated option, select-validator, doesn't work. +// RUN: not %dxc -E main -T vs_6_7 -select-validator internal %s 2>&1 | FileCheck %s + +// CHECK: dxc failed : Unknown argument: '-select-validator' + +float4 main(int loc : SV_StartVertexLocation + , uint loc2 : SV_StartInstanceLocation + ) : SV_Position +{ + float4 r = 0; + r += loc; + r += loc2; + return r; +} diff --git a/tools/clang/test/DXC/dot4add_i8_u8_packed-types.hlsl b/tools/clang/test/DXC/dot4add_i8_u8_packed-types.hlsl new file mode 100644 index 0000000000..53c87bb9c1 --- /dev/null +++ b/tools/clang/test/DXC/dot4add_i8_u8_packed-types.hlsl @@ -0,0 +1,34 @@ +// RUN: %dxc /enable-16bit-types /T cs_6_8 %s | FileCheck %s + +// Compiling this HLSL would fail this assertion in TranslateDot4AddPacked: +// +// DXASSERT( +// !accTy->isVectorTy() && accTy->isIntegerTy(32), +// "otherwise, unexpected vector support in high level intrinsic template"); +// +// Bug was fixed by changing the declarations of dot4add_i8packed and +// dot4add_u8packed in utils/hct/gen_intrin_main.txt to simply write +// out their argument and return types, rather than using the $typeN +// reference syntax. + +// CHECK: call i32 @dx.op.dot4AddPacked.i32{{.*}}Dot4AddI8Packed(acc,a,b) +// CHECK: call i32 @dx.op.dot4AddPacked.i32{{.*}}Dot4AddU8Packed(acc,a,b) +// CHECK: call float @dx.op.dot2AddHalf.f32{{.*}}Dot2AddHalf(acc,ax,ay,bx,by) + +RWByteAddressBuffer buf; + +[numthreads(1, 1, 1)] +void main() +{ + int a = dot4add_i8packed(0, 0, 0); + int b = dot4add_i8packed(0, 0, a); + buf.Store(0, b); + + uint c = dot4add_u8packed(0, 0, 0); + uint d = dot4add_u8packed(0, 0, c); + buf.Store(4, d); + + float e = dot2add(half2(0,0), half2(0,0), 1.0); + float f = dot2add(half2(0,0), half2(0,0), e); + buf.Store(8, f); +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/diagnostics/errors/integer_literal_too_large.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/diagnostics/errors/integer_literal_too_large.hlsl new file mode 100644 index 0000000000..98db6a6f56 --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/diagnostics/errors/integer_literal_too_large.hlsl @@ -0,0 +1,14 @@ +// RUN: %dxc -T lib_6_6 %s | FileCheck %s + +// A diagnostic is generated for an integer literal that is too large to be +// represented by any integer type - an argument indicates whether the text +// contains "signed". That argument was missing in HLSL specific code within +// Sema::ActOnNumericConstant() which resulted in an assert being raised if +// the diagnostic was generated in an assert enabled DXC and a random string +// being inserted in a non-assert enabled DXC. + +// CHECK: integer literal is too large to be represented in any integer type +int a = 98765432109876543210; + +// CHECK: integer literal is too large to be represented in any integer type +uint b = 98765432109876543210U; diff --git a/tools/clang/test/HLSLFileCheck/hlsl/types/struct/struct-annotations.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/types/struct/struct-annotations.hlsl index 5a1b5e43d8..4ffb325c8b 100644 --- a/tools/clang/test/HLSLFileCheck/hlsl/types/struct/struct-annotations.hlsl +++ b/tools/clang/test/HLSLFileCheck/hlsl/types/struct/struct-annotations.hlsl @@ -1,5 +1,5 @@ -// RUN: %dxc -T ps_6_8 -E main -Qkeep_reflect_in_dxil -select-validator internal %s | FileCheck -check-prefix=CHECK68 %s -// RUN: %dxc -T ps_6_7 -E main -Qkeep_reflect_in_dxil -select-validator internal %s | FileCheck -check-prefix=CHECK67 %s +// RUN: %dxc -T ps_6_8 -E main -Qkeep_reflect_in_dxil %s | FileCheck -check-prefix=CHECK68 %s +// RUN: %dxc -T ps_6_7 -E main -Qkeep_reflect_in_dxil %s | FileCheck -check-prefix=CHECK67 %s // Make sure the vector is annotated with vector size (DXIL 1.8 and higher), // matrix is annotated with matrix size and orientation, and scalar does not @@ -47,4 +47,4 @@ StructuredBuffer g_myStruct; float main() : SV_Target { return g_myStruct[0].vec.x + g_myStruct[0].vec.y; -} \ No newline at end of file +} diff --git a/tools/clang/test/HLSLFileCheck/hlsl/workgraph/nested_sv_dispatchgrid.hlsl b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/nested_sv_dispatchgrid.hlsl new file mode 100644 index 0000000000..1da45dae1d --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/hlsl/workgraph/nested_sv_dispatchgrid.hlsl @@ -0,0 +1,130 @@ +// RUN: %dxc -T lib_6_8 %s | FileCheck %s + +// Check that the SV_DispatchGrid DXIL metadata for a node input record is +// generated in cases where: +// node1 - the field with the SV_DispatchGrid semantic is in a nested record +// node2 - the field with the SV_DispatchGrid semantic is in a record field +// node3 - the field with the SV_DispatchGrid semantic is inherited from a base record +// node4 - the field with the SV_DispatchGrid semantic is within a nested record inherited from a base record +// node5 - the field with the SV_DispatchGrid semantic is within a base record of a nested record +// node6 - the field with the SV_DispatchGrid semantic is within a templated base record +// node7 - the field with the SV_DispatchGrid semantic is within a templated base record of a templated record +// node8 - the field with the SV_DispatchGrid semantic has templated type + +struct Record1 { + struct { + // SV_DispatchGrid is within a nested record + uint3 grid : SV_DispatchGrid; + }; +}; + +[Shader("node")] +[NodeMaxDispatchGrid(32,16,1)] +[NumThreads(32,1,1)] +void node1(DispatchNodeInputRecord input) {} +// CHECK: {!"node1" +// CHECK: , i32 1, ![[SVDG_1:[0-9]+]] +// CHECK: [[SVDG_1]] = !{i32 0, i32 5, i32 3} + +struct Record2a { + uint u; + uint2 grid : SV_DispatchGrid; +}; + +struct Record2 { + uint a; + // SV_DispatchGrid is within a record field + Record2a b; +}; + +[Shader("node")] +[NodeMaxDispatchGrid(32,16,1)] +[NumThreads(32,1,1)] +void node2(DispatchNodeInputRecord input) {} +// CHECK: {!"node2" +// CHECK: , i32 1, ![[SVDG_2:[0-9]+]] +// CHECK: [[SVDG_2]] = !{i32 8, i32 5, i32 2} + +struct Record3 : Record2a { + // SV_DispatchGrid is inherited + uint4 n; +}; + +[Shader("node")] +[NodeMaxDispatchGrid(32,16,1)] +[NumThreads(32,1,1)] +void node3(DispatchNodeInputRecord input) {} +// CHECK: {!"node3" +// CHECK: , i32 1, ![[SVDG_3:[0-9]+]] +// CHECK: [[SVDG_3]] = !{i32 4, i32 5, i32 2} + +struct Record4 : Record2 { + // SV_DispatchGrid is in a nested field in a base record + float f; +}; + +[Shader("node")] +[NodeMaxDispatchGrid(32,16,1)] +[NumThreads(32,1,1)] +void node4(DispatchNodeInputRecord input) {} +// CHECK: {!"node4" +// CHECK: , i32 1, ![[SVDG_2]] + +struct Record5 { + uint4 x; + // SV_DispatchGrid is in a base record of a record field + Record3 r; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeMaxDispatchGrid(32,16,1)] +[NumThreads(32,1,1)] +void node5(DispatchNodeInputRecord input) {} +// CHECK: {!"node5" +// CHECK: , i32 1, ![[SVDG_5:[0-9]+]] +// CHECK: [[SVDG_5]] = !{i32 20, i32 5, i32 2} + +template +struct Base { + T DG : SV_DispatchGrid; +}; + +struct Derived1 : Base { + int4 x; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeMaxDispatchGrid(32,16,1)] +[NumThreads(32,1,1)] +void node6(DispatchNodeInputRecord input) {} +// CHECK: {!"node6" +// CHECK: , i32 1, ![[SVDG_1]] + +template +struct Derived2 : Base { + T Y; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeMaxDispatchGrid(32,16,1)] +[NumThreads(32,1,1)] +void node7(DispatchNodeInputRecord > input) {} +// CHECK: {!"node7" +// CHECK: , i32 1, ![[SVDG_7:[0-9]+]] +// CHECK: [[SVDG_7]] = !{i32 0, i32 5, i32 2} + +template +struct Derived3 { + Derived2 V; +}; + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NodeMaxDispatchGrid(32,16,1)] +[NumThreads(32,1,1)] +void node8(DispatchNodeInputRecord< Derived3 > input) {} +// CHECK: {!"node8" +// CHECK: , i32 1, ![[SVDG_1]] diff --git a/tools/clang/test/HLSLFileCheck/infra/auto-dxilver.hlsl b/tools/clang/test/HLSLFileCheck/infra/auto-dxilver.hlsl index 166fa5918d..14ee7f7bf9 100644 --- a/tools/clang/test/HLSLFileCheck/infra/auto-dxilver.hlsl +++ b/tools/clang/test/HLSLFileCheck/infra/auto-dxilver.hlsl @@ -5,14 +5,17 @@ // This should implicitly require dxilver 1.8. // RUN: %dxc -T vs_6_8 -Vd %s | FileCheck %s -// Even though this is using -Vd, the validator version is set by the available -// validator. If that isn't version 1.8 or above, we'll see an error. +// Even though this is using -Vd, the validator version being checked is the internal +// validator's version. If a pre-DXIL-1.8 DXC was used to run this test, we expect failure, +// since the internal validator will be the same version as the older DXC. // The implicit dxilver logic should not skip the check when -Vd is used. // CHECK-NOT: error: validator version {{.*}} does not support target profile. // RUN: %dxc -T vs_6_0 -validator-version 1.8 %s | FileCheck %s // Even though target is 6.0, the explicit -validator-version should add an -// implicit dxilver 1.8 requirement. +// implicit dxilver 1.8 requirement. The requirement should pass for DXCs that +// are newer than DXIL Version 1.8, since then, the internal validator's version will +// be sufficiently new for this check. // CHECK-NOT: error: The module cannot be validated by the version of the validator currently attached. // This error would occur if run against wrong compiler. @@ -21,8 +24,6 @@ // Catch any other unexpected error cases. // CHECK-NOT: error -// RUN: %dxc -T vs_6_8 -select-validator internal %s | FileCheck %s -// This should always be run, and always succeed. // CHECK: define void @main() void main() {} diff --git a/tools/clang/test/HLSLFileCheck/pix/AnnotateVirtualRegs-Raygen.hlsl b/tools/clang/test/HLSLFileCheck/pix/AnnotateVirtualRegs-Raygen.hlsl deleted file mode 100644 index b9670bdaba..0000000000 --- a/tools/clang/test/HLSLFileCheck/pix/AnnotateVirtualRegs-Raygen.hlsl +++ /dev/null @@ -1,36 +0,0 @@ -// RUN: %dxc -Od -T lib_6_6 %s | %opt -S -dxil-annotate-with-virtual-regs | FileCheck %s - - -/* To run locally run: -%dxc -Od -T lib_6_6 %s -Fc %t.ll -%opt %t.ll -S -dxil-annotate-with-virtual-regs | FileCheck %s -*/ - -RaytracingAccelerationStructure scene : register(t0); - -struct RayPayload -{ - int3 color; -}; - -[shader("raygeneration")] -void ENTRY() -{ - RayDesc ray = {{0,0,0}, {0,0,1}, 0.05, 1000.0}; - RayPayload pld; - TraceRay(scene, 0 /*rayFlags*/, 0xFF /*rayMask*/, 0 /*sbtRecordOffset*/, 1 /*sbtRecordStride*/, 0 /*missIndex*/, ray, pld); -} - -// CHECK: {{.*}} = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* {{.*}}, i32 0, i32 0, !pix-dxil-reg [[RDGEP:![0-9]+]], !pix-dxil-inst-num {{.*}} -// CHECK: {{.*}} = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @dx.nothing.a, i32 0, i32 0), !pix-dxil-reg [[NothGEP:![0-9]+]], !pix-dxil-inst-num {{.*}} -// CHECK: {{.*}} = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* {{.*}}, i32 0, i32 1, !pix-dxil-reg [[RDGEP2:![0-9]+]], !pix-dxil-inst-num {{.*}} -// CHECK: {{.*}} = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @dx.nothing.a, i32 0, i32 0), !pix-dxil-reg [[NothGEP2:![0-9]+]], !pix-dxil-inst-num {{.*}} -// CHECK: {{.*}} = getelementptr inbounds %struct.RayDesc, %struct.RayDesc* {{.*}}, i32 0, i32 2, !pix-dxil-reg [[RDGEP3:![0-9]+]], !pix-dxil-inst-num {{.*}} -// CHECK: {{.*}} = load i32, i32* getelementptr inbounds ([1 x i32], [1 x i32]* @dx.nothing.a, i32 0, i32 0), !pix-dxil-reg [[NothGEP3:![0-9]+]], !pix-dxil-inst-num {{.*}} - -// CHECK-DAG: [[RDGEP]] = !{i32 0, i32 0} -// CHECK-DAG: [[NothGEP]] = !{i32 0, i32 11} -// CHECK-DAG: [[RDGEP2]] = !{i32 0, i32 3} -// CHECK-DAG: [[NothGEP2]] = !{i32 0, i32 12} -// CHECK-DAG: [[RDGEP3]] = !{i32 0, i32 4} -// CHECK-DAG: [[NothGEP3]] = !{i32 0, i32 13} diff --git a/tools/clang/test/HLSLFileCheck/pix/DbgValueToDbgDeclare_dynamic_array_index.hlsl b/tools/clang/test/HLSLFileCheck/pix/DbgValueToDbgDeclare_dynamic_array_index.hlsl new file mode 100644 index 0000000000..cba891424a --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/pix/DbgValueToDbgDeclare_dynamic_array_index.hlsl @@ -0,0 +1,27 @@ +// RUN: %dxc -Tcs_6_0 /Od %s | %opt -S -dxil-annotate-with-virtual-regs | %FileCheck %s + +// Check that there is an alloca backing the local array +// CHECK: [[ARRAYNAME:%.*]] = alloca [4 x float] + +// Grab the GEP for the above array's element that we're expecting to store to: +// CHECK: [[ARRAYELEMENTPTR:%.*]] = getelementptr inbounds [4 x float], [4 x float]* [[ARRAYNAME]] + +// Check that the store to the alloca is annotated with pix-alloca-reg-read metadata +// (meaning that the pass accurately noted that the 8.0 is stored to a dynamic array index) +// CHECK: store float 8.000000e+00, float* [[ARRAYELEMENTPTR]] +// CHECK-SAME: !pix-alloca-reg-write + + +RWByteAddressBuffer RawUAV: register(u1); + +[numthreads(1, 1, 1)] +void main() +{ + float local_array[4]; + local_array[RawUAV.Load(0)] = 8; + local_array[RawUAV.Load(1)] = 128; + + RawUAV.Store(64+0,local_array[0]); + RawUAV.Store(64+4,local_array[1]); +} + diff --git a/tools/clang/test/HLSLFileCheck/pix/Debug_dynamic_array_index.hlsl b/tools/clang/test/HLSLFileCheck/pix/Debug_dynamic_array_index.hlsl new file mode 100644 index 0000000000..9ab5bce95a --- /dev/null +++ b/tools/clang/test/HLSLFileCheck/pix/Debug_dynamic_array_index.hlsl @@ -0,0 +1,19 @@ +// RUN: %dxc -Tcs_6_0 /Od %s | %opt -S -dxil-annotate-with-virtual-regs -hlsl-dxil-debug-instrumentation,UAVSize=128,upstreamSVPositionRow=2 -hlsl-dxilemit | %FileCheck %s + +// Check that there is a block precis that correctly returns that the array is a 4-value float array +// CHECK: Block#0 +// CHECK-SAME: d,0-4 + +RWByteAddressBuffer RawUAV: register(u1); + +[numthreads(1, 1, 1)] +void main() +{ + float local_array[4]; + local_array[RawUAV.Load(0)] = 8; + local_array[RawUAV.Load(1)] = 128; + + RawUAV.Store(64+0,local_array[0]); + RawUAV.Store(64+4,local_array[1]); +} + diff --git a/tools/clang/test/HLSLFileCheck/shader_targets/raytracing/raytracing_intersection_geometryIndex.hlsl b/tools/clang/test/HLSLFileCheck/shader_targets/raytracing/raytracing_intersection_geometryIndex.hlsl index 12df1ecbcf..98997a52b1 100644 --- a/tools/clang/test/HLSLFileCheck/shader_targets/raytracing/raytracing_intersection_geometryIndex.hlsl +++ b/tools/clang/test/HLSLFileCheck/shader_targets/raytracing/raytracing_intersection_geometryIndex.hlsl @@ -1,10 +1,10 @@ // RUN: %dxc -T lib_6_5 -auto-binding-space 11 %s | FileCheck %s // CHECK: define void [[intersection1:@"\\01\?intersection1@[^\"]+"]]() #0 { -// CHECK: [[rayTCurrent:%[^ ]+]] = call float @dx.op.rayTCurrent.f32(i32 154) -// CHECK: [[GeometryIndex:%[^ ]+]] = call i32 @dx.op.geometryIndex.i32(i32 213) -// CHECK: icmp eq i32 [[GeometryIndex]], 0 -// CHECK: call i1 @dx.op.reportHit.struct.MyAttributes(i32 158, float [[rayTCurrent]], i32 0, %struct.MyAttributes* nonnull {{.*}}) +// CHECK-DAG: [[rayTCurrent:%[^ ]+]] = call float @dx.op.rayTCurrent.f32(i32 154) +// CHECK-DAG: [[GeometryIndex:%[^ ]+]] = call i32 @dx.op.geometryIndex.i32(i32 213) +// CHECK-DAG: icmp eq i32 [[GeometryIndex]], 0 +// CHECK-DAG: call i1 @dx.op.reportHit.struct.MyAttributes(i32 158, float [[rayTCurrent]], i32 0, %struct.MyAttributes* nonnull {{.*}}) // CHECK: ret void struct MyAttributes { diff --git a/tools/clang/test/LitDXILValidation/load-store-validation.ll b/tools/clang/test/LitDXILValidation/load-store-validation.ll index 34b2f6b602..16c64672bd 100644 --- a/tools/clang/test/LitDXILValidation/load-store-validation.ll +++ b/tools/clang/test/LitDXILValidation/load-store-validation.ll @@ -1,3 +1,4 @@ +; REQUIRES: dxil-1-9 ; RUN: not %dxv %s 2>&1 | FileCheck %s ; Ensure proper validation errors are produced for invalid parameters to load and store operations. diff --git a/tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-failing.ll b/tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-failing.ll new file mode 100644 index 0000000000..33591126e5 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-failing.ll @@ -0,0 +1,86 @@ +; REQUIRES: dxil-1-9 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; Original Source: \tools\clang\test\CodeGenHLSL\linalg\outer-product-accumulate-matrix-layout.hlsl +; The failing tests were generated by manually editing the IR produced from the IR from the passing +; case generated by running the hlsl above (Original Source) + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResBind = type { i32, i32, i32, i8 } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.ResRet.v8f16 = type { <8 x half>, i32 } +%struct.ByteAddressBuffer = type { i32 } +%struct.RWByteAddressBuffer = type { i32 } + +; As noted in other tests, the validation errors come out in +; an order different from the IR. So listed them here in the +; order they appear and added comments for correlation + +;CHECK: error: matrix stride must be a constant zero for optimal layouts +;CHECK: error: matrix stride must be a constant zero for optimal layouts +;CHECK-NOT: error: matrix layout value 'OuterProductOptimal' is not valid for outerproductaccumulate, must be 'OuterProductOptimal' +;CHECK: error: matrix layout value 'MulOptimal' is not valid for outerproductaccumulate, must be 'OuterProductOptimal' +;CHECK: error: matrix layout value 'ColumnMajor' is not valid for outerproductaccumulate, must be 'OuterProductOptimal' +;CHECK: error: matrix layout value 'RowMajor' is not valid for outerproductaccumulate, must be 'OuterProductOptimal' +; CHECK: Validation failed. + +define void @main() { + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %2 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %3 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind zeroinitializer, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %4 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 11, i32 0 }) ; AnnotateHandle(res,props) resource: ByteAddressBuffer + %5 = call %dx.types.ResRet.v8f16 @dx.op.rawBufferVectorLoad.v8f16(i32 303, %dx.types.Handle %4, i32 0, i32 undef, i32 2) ; RawBufferVectorLoad(buf,index,elementOffset,alignment) + %6 = extractvalue %dx.types.ResRet.v8f16 %5, 0 + %7 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 11, i32 0 }) ; AnnotateHandle(res,props) resource: ByteAddressBuffer + %8 = call %dx.types.ResRet.v8f16 @dx.op.rawBufferVectorLoad.v8f16(i32 303, %dx.types.Handle %7, i32 0, i32 undef, i32 2) ; RawBufferVectorLoad(buf,index,elementOffset,alignment) + %9 = extractvalue %dx.types.ResRet.v8f16 %8, 0 + %10 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + ; error: matrix layout value 'RowMajor' is not valid for outerproductaccumulate, must be 'OuterProductOptimal' + call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %6, <8 x half> %9, %dx.types.Handle %10, i32 0, i32 8, i32 0, i32 0) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) + ; error: matrix layout value 'ColumnMajor' is not valid for outerproductaccumulate, must be 'OuterProductOptimal' + call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %6, <8 x half> %9, %dx.types.Handle %10, i32 0, i32 8, i32 1, i32 0) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) + ; matrix layout value 'MulOptimal' is not valid for outerproductaccumulate, must be 'OuterProductOptimal' + call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %6, <8 x half> %9, %dx.types.Handle %10, i32 0, i32 8, i32 2, i32 0) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) + ; error: matrix stride must be a constant zero for optimal layouts + call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %6, <8 x half> %9, %dx.types.Handle %10, i32 0, i32 8, i32 3, i32 64) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) + ; error: matrix stride must be a constant zero for optimal layouts + call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %6, <8 x half> %9, %dx.types.Handle %10, i32 0, i32 8, i32 3, i32 63) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) + ret void +} + +; Function Attrs: nounwind readonly +declare %dx.types.ResRet.v8f16 @dx.op.rawBufferVectorLoad.v8f16(i32, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.outerProductAccumulate.v8f16.v8f16(i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #2 + +attributes #0 = { nounwind readonly } +attributes #1 = { nounwind } +attributes #2 = { nounwind readnone } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.entryPoints = !{!8} + +!0 = !{i32 1, i32 9} +!1 = !{!"cs", i32 6, i32 9} +!2 = !{!3, !6, null, null} +!3 = !{!4, !5} +!4 = !{i32 0, %struct.ByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i32 0, null} +!5 = !{i32 1, %struct.ByteAddressBuffer* undef, !"", i32 0, i32 1, i32 1, i32 11, i32 0, null} +!6 = !{!7} +!7 = !{i32 0, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!8 = !{void ()* @main, !"main", null, !2, !9} +!9 = !{i32 0, i64 8598323216, i32 4, !10} +!10 = !{i32 1, i32 1, i32 1} diff --git a/tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-passing.ll b/tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-passing.ll new file mode 100644 index 0000000000..44cd3e48b3 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/outer-product-accumulate-matrix-layout-passing.ll @@ -0,0 +1,65 @@ +; REQUIRES: dxil-1-9 +; RUN: %dxv %s 2>&1 | FileCheck %s + +;Original Source: \tools\clang\test\CodeGenHLSL\linalg\outer-product-accumulate-matrix-layout.hlsl + +target datalayout = "e-m:e-p:32:32-i1:32-i8:8-i16:16-i32:32-i64:64-f16:16-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResBind = type { i32, i32, i32, i8 } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.ResRet.v8f16 = type { <8 x half>, i32 } +%struct.ByteAddressBuffer = type { i32 } +%struct.RWByteAddressBuffer = type { i32 } + +;CHECK: Validation succeeded. + +define void @main() { + %1 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 0, i32 0, i32 0, i8 1 }, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %2 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind { i32 1, i32 1, i32 0, i8 0 }, i32 1, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %3 = call %dx.types.Handle @dx.op.createHandleFromBinding(i32 217, %dx.types.ResBind zeroinitializer, i32 0, i1 false) ; CreateHandleFromBinding(bind,index,nonUniformIndex) + %4 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 11, i32 0 }) ; AnnotateHandle(res,props) resource: ByteAddressBuffer + %5 = call %dx.types.ResRet.v8f16 @dx.op.rawBufferVectorLoad.v8f16(i32 303, %dx.types.Handle %4, i32 0, i32 undef, i32 2) ; RawBufferVectorLoad(buf,index,elementOffset,alignment) + %6 = extractvalue %dx.types.ResRet.v8f16 %5, 0 + %7 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 11, i32 0 }) ; AnnotateHandle(res,props) resource: ByteAddressBuffer + %8 = call %dx.types.ResRet.v8f16 @dx.op.rawBufferVectorLoad.v8f16(i32 303, %dx.types.Handle %7, i32 0, i32 undef, i32 2) ; RawBufferVectorLoad(buf,index,elementOffset,alignment) + %9 = extractvalue %dx.types.ResRet.v8f16 %8, 0 + %10 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %1, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + call void @dx.op.outerProductAccumulate.v8f16.v8f16(i32 307, <8 x half> %6, <8 x half> %9, %dx.types.Handle %10, i32 0, i32 8, i32 3, i32 0) ; OuterProductAccumulate(inputVector1,inputVector2,matrixBuffer,matrixOffset,matrixIntepretation,matrixLayout,matrixStride) + ret void +} + +; Function Attrs: nounwind readonly +declare %dx.types.ResRet.v8f16 @dx.op.rawBufferVectorLoad.v8f16(i32, %dx.types.Handle, i32, i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.outerProductAccumulate.v8f16.v8f16(i32, <8 x half>, <8 x half>, %dx.types.Handle, i32, i32, i32, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.createHandleFromBinding(i32, %dx.types.ResBind, i32, i1) #2 + +attributes #0 = { nounwind readonly } +attributes #1 = { nounwind } +attributes #2 = { nounwind readnone } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.entryPoints = !{!8} + +!0 = !{i32 1, i32 9} +!1 = !{!"cs", i32 6, i32 9} +!2 = !{!3, !6, null, null} +!3 = !{!4, !5} +!4 = !{i32 0, %struct.ByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i32 0, null} +!5 = !{i32 1, %struct.ByteAddressBuffer* undef, !"", i32 0, i32 1, i32 1, i32 11, i32 0, null} +!6 = !{!7} +!7 = !{i32 0, %struct.RWByteAddressBuffer* undef, !"", i32 0, i32 0, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!8 = !{void ()* @main, !"main", null, !2, !9} +!9 = !{i32 0, i64 8598323216, i32 4, !10} +!10 = !{i32 1, i32 1, i32 1} diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_failing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_failing.ll new file mode 100644 index 0000000000..7270996b91 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_failing.ll @@ -0,0 +1,202 @@ +; REQUIRES: dxil-1-9 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.HitObject = type { i8* } + +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r287_ud = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject undef, i32 undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK: note: at '%r287_ud = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject undef, i32 undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r287 = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject undef, i32 1)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r271 = call i1 @dx.op.hitObject_StateScalar.i1(i32 271, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r270 = call i1 @dx.op.hitObject_StateScalar.i1(i32 270, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r269 = call i1 @dx.op.hitObject_StateScalar.i1(i32 269, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r286 = call i32 @dx.op.hitObject_StateScalar.i32(i32 286, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r285 = call i32 @dx.op.hitObject_StateScalar.i32(i32 285, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r284 = call i32 @dx.op.hitObject_StateScalar.i32(i32 284, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r283 = call i32 @dx.op.hitObject_StateScalar.i32(i32 283, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r282 = call i32 @dx.op.hitObject_StateScalar.i32(i32 282, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r281 = call i32 @dx.op.hitObject_StateScalar.i32(i32 281, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r272 = call i32 @dx.op.hitObject_StateScalar.i32(i32 272, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r288_wrongmul = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject undef, i32 7)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: parameter 'offset' must be a multiple of 4, got 7 +; CHECK: note: at '%r288_wrongmul = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject undef, i32 7)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r288 = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject undef, i32 42)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: parameter 'offset' must be a multiple of 4, got 42 +; CHECK: note: at '%r288 = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject undef, i32 42)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: expect component between 0~2, got 3. +; CHECK: note: at '%r278_oobc = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %nop, i32 3)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: component of HitObject_ObjectRayDirection must be an immediate constant. +; CHECK: note: at '%r278_dync = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %nop, i32 %r272)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r278 = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject undef, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: expect component between 0~2, got 3. +; CHECK: note: at '%r277_oobc = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %nop, i32 3)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: component of HitObject_ObjectRayOrigin must be an immediate constant. +; CHECK: note: at '%r277_dync = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %nop, i32 %r272)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r277 = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject undef, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: expect component between 0~2, got 3. +; CHECK: note: at '%r276_oobc = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %nop, i32 3)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: component of HitObject_WorldRayDirection must be an immediate constant. +; CHECK: note: at '%r276_dync = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %nop, i32 %r272)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r276 = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject undef, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: expect component between 0~2, got 3. +; CHECK: note: at '%r275_oobc = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %nop, i32 3)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: component of HitObject_WorldRayOrigin must be an immediate constant. +; CHECK: note: at '%r275_dync = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %nop, i32 %r272)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r275 = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject undef, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r274 = call float @dx.op.hitObject_StateScalar.f32(i32 274, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r273 = call float @dx.op.hitObject_StateScalar.f32(i32 273, %dx.types.HitObject undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: expect column between 0~3, got 4. +; CHECK: note: at '%r280_oobc = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 0, i32 4)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: column of HitObject_WorldToObject3x4 must be an immediate constant. +; CHECK: note: at '%r280_dync = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 0, i32 %r272)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: expect row between 0~2, got 3. +; CHECK: note: at '%r280_oobr = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 3, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: row of HitObject_WorldToObject3x4 must be an immediate constant. +; CHECK: note: at '%r280_dynr = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 %r272, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r280 = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject undef, i32 0, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: expect column between 0~3, got 4. +; CHECK: note: at '%r279_oobc = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 0, i32 4)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: column of HitObject_ObjectToWorld3x4 must be an immediate constant. +; CHECK: note: at '%r279_dync = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 0, i32 %r272)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: expect row between 0~2, got 3. +; CHECK: note: at '%r279_oobr = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 3, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: row of HitObject_ObjectToWorld3x4 must be an immediate constant. +; CHECK: note: at '%r279_dynr = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 %r272, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK: note: at '%r279 = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject undef, i32 0, i32 0)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Validation failed. + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { +%nop = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() + %r269 = call i1 @dx.op.hitObject_StateScalar.i1(i32 269, %dx.types.HitObject undef) ; HitObject_IsMiss(hitObject) + + %r270 = call i1 @dx.op.hitObject_StateScalar.i1(i32 270, %dx.types.HitObject undef) ; HitObject_IsHit(hitObject) + + %r271 = call i1 @dx.op.hitObject_StateScalar.i1(i32 271, %dx.types.HitObject undef) ; HitObject_IsNop(hitObject) + + %r272 = call i32 @dx.op.hitObject_StateScalar.i32(i32 272, %dx.types.HitObject undef) ; HitObject_RayFlags(hitObject) + + %r273 = call float @dx.op.hitObject_StateScalar.f32(i32 273, %dx.types.HitObject undef) ; HitObject_RayTMin(hitObject) + + %r274 = call float @dx.op.hitObject_StateScalar.f32(i32 274, %dx.types.HitObject undef) ; HitObject_RayTCurrent(hitObject) + + %r275 = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject undef, i32 0) ; HitObject_WorldRayOrigin(hitObject,component) + %r275_dync = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %nop, i32 %r272) ; HitObject_WorldRayOrigin(hitObject,component) + %r275_oobc = call float @dx.op.hitObject_StateVector.f32(i32 275, %dx.types.HitObject %nop, i32 3) ; HitObject_WorldRayOrigin(hitObject,component) + + %r276 = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject undef, i32 0) ; HitObject_WorldRayDirection(hitObject,component) + %r276_dync = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %nop, i32 %r272) ; HitObject_WorldRayDirection(hitObject,component) + %r276_oobc = call float @dx.op.hitObject_StateVector.f32(i32 276, %dx.types.HitObject %nop, i32 3) ; HitObject_WorldRayDirection(hitObject,component) + + %r277 = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject undef, i32 0) ; HitObject_ObjectRayOrigin(hitObject,component) + %r277_dync = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %nop, i32 %r272) ; HitObject_ObjectRayOrigin(hitObject,component) + %r277_oobc = call float @dx.op.hitObject_StateVector.f32(i32 277, %dx.types.HitObject %nop, i32 3) ; HitObject_ObjectRayOrigin(hitObject,component) + + %r278 = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject undef, i32 0) ; HitObject_ObjectRayDirection(hitObject,component) + %r278_dync = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %nop, i32 %r272) ; HitObject_ObjectRayDirection(hitObject,component) + %r278_oobc = call float @dx.op.hitObject_StateVector.f32(i32 278, %dx.types.HitObject %nop, i32 3) ; HitObject_ObjectRayDirection(hitObject,component) + + %r279 = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject undef, i32 0, i32 0) ; HitObject_ObjectToWorld3x4(hitObject,row,col) + %r279_dynr = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 %r272, i32 0) ; HitObject_ObjectToWorld3x4(hitObject,row,col) + %r279_oobr = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 3, i32 0) ; HitObject_ObjectToWorld3x4(hitObject,row,col) + %r279_dync = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 0, i32 %r272) ; HitObject_ObjectToWorld3x4(hitObject,row,col) + %r279_oobc = call float @dx.op.hitObject_StateMatrix.f32(i32 279, %dx.types.HitObject %nop, i32 0, i32 4) ; HitObject_ObjectToWorld3x4(hitObject,row,col) + + %r280 = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject undef, i32 0, i32 0) ; HitObject_WorldToObject3x4(hitObject,row,col) + %r280_dynr = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 %r272, i32 0) ; HitObject_WorldToObject3x4(hitObject,row,col) + %r280_oobr = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 3, i32 0) ; HitObject_WorldToObject3x4(hitObject,row,col) + %r280_dync = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 0, i32 %r272) ; HitObject_WorldToObject3x4(hitObject,row,col) + %r280_oobc = call float @dx.op.hitObject_StateMatrix.f32(i32 280, %dx.types.HitObject %nop, i32 0, i32 4) ; HitObject_WorldToObject3x4(hitObject,row,col) + + %r281 = call i32 @dx.op.hitObject_StateScalar.i32(i32 281, %dx.types.HitObject undef) ; HitObject_GeometryIndex(hitObject) + + %r282 = call i32 @dx.op.hitObject_StateScalar.i32(i32 282, %dx.types.HitObject undef) ; HitObject_InstanceIndex(hitObject) + + %r283 = call i32 @dx.op.hitObject_StateScalar.i32(i32 283, %dx.types.HitObject undef) ; HitObject_InstanceID(hitObject) + + %r284 = call i32 @dx.op.hitObject_StateScalar.i32(i32 284, %dx.types.HitObject undef) ; HitObject_PrimitiveIndex(hitObject) + + %r285 = call i32 @dx.op.hitObject_StateScalar.i32(i32 285, %dx.types.HitObject undef) ; HitObject_HitKind(hitObject) + + %r286 = call i32 @dx.op.hitObject_StateScalar.i32(i32 286, %dx.types.HitObject undef) ; HitObject_ShaderTableIndex(hitObject) + + %r287 = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject undef, i32 1) ; HitObject_SetShaderTableIndex(hitObject,shaderTableIndex) + %r287_ud = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject undef, i32 undef) ; HitObject_SetShaderTableIndex(hitObject,shaderTableIndex) + + %r288 = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject undef, i32 42) ; HitObject_LoadLocalRootTableConstant(hitObject,offset) + %r288_wrongmul = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject undef, i32 7) ; HitObject_LoadLocalRootTableConstant(hitObject,offset) + + ret void +} + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32, %dx.types.HitObject, i32) #1 + +; Function Attrs: nounwind readnone +declare i1 @dx.op.hitObject_StateScalar.i1(i32, %dx.types.HitObject) #1 + +; Function Attrs: nounwind readnone +declare i32 @dx.op.hitObject_StateScalar.i32(i32, %dx.types.HitObject) #1 + +; Function Attrs: nounwind readonly +declare i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32, %dx.types.HitObject, i32) #2 + +; Function Attrs: nounwind readnone +declare float @dx.op.hitObject_StateVector.f32(i32, %dx.types.HitObject, i32) #1 + +; Function Attrs: nounwind readnone +declare float @dx.op.hitObject_StateScalar.f32(i32, %dx.types.HitObject) #1 + +; Function Attrs: nounwind readnone +declare float @dx.op.hitObject_StateMatrix.f32(i32, %dx.types.HitObject, i32, i32) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } +attributes #3 = { nounwind argmemonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.typeAnnotations = !{!2} +!dx.entryPoints = !{!3, !4} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !5} +!3 = !{null, !"", null, null, !6} +!4 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !7} +!5 = !{!8} +!6 = !{i32 0, i64 0} +!7 = !{i32 8, i32 7, i32 5, !9} +!8 = !{i32 1, !10, !10} +!9 = !{i32 0} +!10 = !{} diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll index e527125009..74cc94fb78 100644 --- a/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_accessors_passing.ll @@ -52,7 +52,7 @@ define void @"\01?main@@YAXXZ"() #0 { %r287 = call %dx.types.HitObject @dx.op.hitObject_SetShaderTableIndex(i32 287, %dx.types.HitObject %nop, i32 1) ; HitObject_SetShaderTableIndex(hitObject,shaderTableIndex) - %r288 = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject %nop, i32 42) ; HitObject_LoadLocalRootTableConstant(hitObject,offset) + %r288 = call i32 @dx.op.hitObject_LoadLocalRootTableConstant(i32 288, %dx.types.HitObject %nop, i32 16) ; HitObject_LoadLocalRootTableConstant(hitObject,offset) call void @dx.op.hitObject_Attributes.struct.AttribType(i32 289, %dx.types.HitObject %nop, %struct.AttribType* nonnull %attrs) ; HitObject_Attributes(hitObject,attributes) ret void diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_failing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_failing.ll new file mode 100644 index 0000000000..602ff99a55 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_fromrayquery_failing.ll @@ -0,0 +1,99 @@ +; REQUIRES: dxil-1-9 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; CHECK: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%attrsud3 = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 %rq, i32 16, %struct.CustomAttrs* nonnull undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%attrsud2 = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 %rq, i32 undef, %struct.CustomAttrs* nonnull %attra)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%attrsud1 = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 undef, i32 16, %struct.CustomAttrs* nonnull %attra)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%ud1 = call %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32 263, i32 undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Validation failed. + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%struct.Payload = type { <3 x float> } +%struct.CustomAttrs = type { float, float } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.HitObject = type { i8* } +%struct.RaytracingAccelerationStructure = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %ldh = load %dx.types.Handle, %dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", align 4 + %attra = alloca %struct.CustomAttrs, align 4 + %rq = call i32 @dx.op.allocateRayQuery(i32 178, i32 5) ; AllocateRayQuery(constRayFlags) + %createh = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %ldh) ; CreateHandleForLib(Resource) + %annoth = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %createh, %dx.types.ResourceProperties { i32 16, i32 0 }) ; AnnotateHandle(res,props) resource: RTAccelerationStructure + call void @dx.op.rayQuery_TraceRayInline(i32 179, i32 %rq, %dx.types.Handle %annoth, i32 0, i32 255, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03) ; RayQuery_TraceRayInline(rayQueryHandle,accelerationStructure,rayFlags,instanceInclusionMask,origin_X,origin_Y,origin_Z,tMin,direction_X,direction_Y,direction_Z,tMax) + + %ok = call %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32 263, i32 %rq) ; HitObject_FromRayQuery(rayQueryHandle) + %ud1 = call %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32 263, i32 undef) ; HitObject_FromRayQuery(rayQueryHandle) + + %attrsok = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 %rq, i32 16, %struct.CustomAttrs* nonnull %attra) ; HitObject_FromRayQueryWithAttrs(rayQueryHandle,HitKind,CommittedAttribs) + %attrsud1 = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 undef, i32 16, %struct.CustomAttrs* nonnull %attra) ; HitObject_FromRayQueryWithAttrs(rayQueryHandle,HitKind,CommittedAttribs) + %attrsud2 = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 %rq, i32 undef, %struct.CustomAttrs* nonnull %attra) ; HitObject_FromRayQueryWithAttrs(rayQueryHandle,HitKind,CommittedAttribs) + %attrsud3 = call %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32 264, i32 %rq, i32 16, %struct.CustomAttrs* nonnull undef) ; HitObject_FromRayQueryWithAttrs(rayQueryHandle,HitKind,CommittedAttribs) + + ret void +} + +; Function Attrs: nounwind +declare i32 @dx.op.allocateRayQuery(i32, i32) #0 + +; Function Attrs: nounwind +declare void @dx.op.rayQuery_TraceRayInline(i32, i32, %dx.types.Handle, i32, i32, float, float, float, float, float, float, float, float) #0 + +; Function Attrs: nounwind readonly +declare %dx.types.HitObject @dx.op.hitObject_FromRayQueryWithAttrs.struct.CustomAttrs(i32, i32, i32, %struct.CustomAttrs*) #1 + +; Function Attrs: nounwind readonly +declare %dx.types.HitObject @dx.op.hitObject_FromRayQuery(i32, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readonly } +attributes #2 = { nounwind readnone } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.typeAnnotations = !{!6} +!dx.dxrPayloadAnnotations = !{!10} +!dx.entryPoints = !{!13, !15} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{!3, null, null, null} +!3 = !{!4} +!4 = !{i32 0, %struct.RaytracingAccelerationStructure* bitcast (%dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A" to %struct.RaytracingAccelerationStructure*), !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !5} +!5 = !{i32 0, i32 4} +!6 = !{i32 1, void ()* @"\01?main@@YAXXZ", !7} +!7 = !{!8} +!8 = !{i32 1, !9, !9} +!9 = !{} +!10 = !{i32 0, %struct.Payload undef, !11} +!11 = !{!12} +!12 = !{i32 0, i32 8210} +!13 = !{null, !"", null, !2, !14} +!14 = !{i32 0, i64 33554432} +!15 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !16} +!16 = !{i32 8, i32 7, i32 5, !17} +!17 = !{i32 0} diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_invoke_failing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_invoke_failing.ll new file mode 100644 index 0000000000..a6bdd49f72 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_invoke_failing.ll @@ -0,0 +1,58 @@ +; REQUIRES: dxil-1-9 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%struct.Payload = type { <3 x float> } +%dx.types.HitObject = type { i8* } + +; CHECK: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at 'call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject %nop, %struct.Payload* nonnull undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: HitObject is undef. +; CHECK-NEXT: note: at 'call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject undef, %struct.Payload* nonnull %pld)' in block '#0' of function '?main@@YAXXZ'. + +; CHECK-NEXT: Validation failed. + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %pld = alloca %struct.Payload, align 4 + %nop = call %dx.types.HitObject @dx.op.hitObject_MakeNop(i32 266) ; HitObject_MakeNop() + call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject %nop, %struct.Payload* nonnull %pld) ; HitObject_Invoke(hitObject,payload) + call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject undef, %struct.Payload* nonnull %pld) ; HitObject_Invoke(hitObject,payload) + call void @dx.op.hitObject_Invoke.struct.Payload(i32 267, %dx.types.HitObject %nop, %struct.Payload* nonnull undef) ; HitObject_Invoke(hitObject,payload) + + ret void +} + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_MakeNop(i32) #1 + +; Function Attrs: nounwind +declare void @dx.op.hitObject_Invoke.struct.Payload(i32, %dx.types.HitObject, %struct.Payload*) #0 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.typeAnnotations = !{!2} +!dx.dxrPayloadAnnotations = !{!3} +!dx.entryPoints = !{!4, !6} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !7} +!3 = !{i32 0, %struct.Payload undef, !8} +!4 = !{null, !"", null, null, !5} +!5 = !{i32 0, i64 0} +!6 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !9} +!7 = !{!10} +!8 = !{!11} +!9 = !{i32 8, i32 7, i32 5, !12} +!10 = !{i32 1, !13, !13} +!11 = !{i32 0, i32 8210} +!12 = !{i32 0} +!13 = !{} diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_make_failing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_make_failing.ll new file mode 100644 index 0000000000..b47f178ca2 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_make_failing.ll @@ -0,0 +1,44 @@ +; REQUIRES: dxil-1-9 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.HitObject = type { i8* } + +; CHECK: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK: note: at '%r265_udmiss = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 4, i32 undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK: note: at '%r265_udflags = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 undef, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03)' in block '#0' of function '?main@@YAXXZ'. +; CHECK: Validation failed. + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %r265_udflags = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 undef, i32 0, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03) ; HitObject_MakeMiss(RayFlags,MissShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax) + %r265_udmiss = call %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32 265, i32 4, i32 undef, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 0.000000e+00, float 1.000000e+00, float 0.000000e+00, float 0.000000e+00, float 9.999000e+03) ; HitObject_MakeMiss(RayFlags,MissShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax) + ret void +} + +; Function Attrs: nounwind readnone +declare %dx.types.HitObject @dx.op.hitObject_MakeMiss(i32, i32, i32, float, float, float, float, float, float, float, float) #1 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.typeAnnotations = !{!2} +!dx.entryPoints = !{!9, !11} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{i32 1, void ()* @"\01?main@@YAXXZ", !3} +!3 = !{!4} +!4 = !{i32 1, !5, !5} +!5 = !{} +!9 = !{null, !"", null, null, !10} +!10 = !{i32 0, i64 0} +!11 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !12} +!12 = !{i32 8, i32 7, i32 5, !13} +!13 = !{i32 0} diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_trace_failing.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_trace_failing.ll new file mode 100644 index 0000000000..eb0d2576b0 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_trace_failing.ll @@ -0,0 +1,114 @@ +; REQUIRES: dxil-1-9 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%struct.Payload = type { <3 x float> } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.HitObject = type { i8* } +%struct.RaytracingAccelerationStructure = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 + +; CHECK: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud16 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* undef)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud15 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float undef, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud14 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float undef, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud13 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float undef, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud12 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud11 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float undef, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud10 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float undef, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud9 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float undef, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud8 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud7 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 undef, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud6 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 undef, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud5 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 undef, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud4 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 undef, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud3 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 undef, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Instructions should not read uninitialized value. +; CHECK-NEXT: note: at '%tud2 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle undef, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: TraceRay should only use RTAccelerationStructure. +; CHECK-NEXT: note: at '%tud2 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle undef, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2)' in block '#0' of function '?main@@YAXXZ'. + +; CHECK-NEXT: Validation failed. + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", align 4 + %2 = alloca %struct.Payload, align 4 + %3 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %4 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %3, %dx.types.ResourceProperties { i32 16, i32 0 }) ; AnnotateHandle(res,props) resource: RTAccelerationStructure + + %tok = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud2 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle undef, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud3 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 undef, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud4 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 undef, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud5 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 undef, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud6 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 undef, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud7 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 undef, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud8 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float undef, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud9 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float undef, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud10 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float undef, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud11 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float undef, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud12 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float undef, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud13 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float undef, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud14 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float undef, float 7.000000e+00, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud15 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float undef, %struct.Payload* nonnull %2) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + %tud16 = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %4, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* undef) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + + ret void +} + +; Function Attrs: nounwind +declare %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.Payload*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.typeAnnotations = !{!3} +!dx.dxrPayloadAnnotations = !{!4} +!dx.entryPoints = !{!5, !6} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{!7, null, null, null} +!3 = !{i32 1, void ()* @"\01?main@@YAXXZ", !8} +!4 = !{i32 0, %struct.Payload undef, !9} +!5 = !{null, !"", null, !2, null} +!6 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !10} +!7 = !{!11} +!8 = !{!12} +!9 = !{!13} +!10 = !{i32 8, i32 7, i32 5, !14} +!11 = !{i32 0, %struct.RaytracingAccelerationStructure* bitcast (%dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A" to %struct.RaytracingAccelerationStructure*), !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !15} +!12 = !{i32 1, !16, !16} +!13 = !{i32 0, i32 8210} +!14 = !{i32 0} +!15 = !{i32 0, i32 4} +!16 = !{} diff --git a/tools/clang/test/LitDXILValidation/ser_hitobject_trace_invaliduav.ll b/tools/clang/test/LitDXILValidation/ser_hitobject_trace_invaliduav.ll new file mode 100644 index 0000000000..c4f3a918f8 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_hitobject_trace_invaliduav.ll @@ -0,0 +1,108 @@ +; REQUIRES: dxil-1-9 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; shader hash: b22988e7874179601860019e56fb877e +; +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; RTAS texture i32 ras T0t4294967295,space4294967295 1 +; nonas_buf UAV byte r/w U0u4294967295,space4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%struct.Payload = type { <3 x float> } +%dx.types.ResourceProperties = type { i32, i32 } +%dx.types.HitObject = type { i8* } +%struct.RaytracingAccelerationStructure = type { i32 } +%struct.RWByteAddressBuffer = type { i32 } + +@"\01?RTAS@@3URaytracingAccelerationStructure@@A" = external constant %dx.types.Handle, align 4 +@"\01?nonas_buf@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4 + +; CHECK: Function: ?main@@YAXXZ: error: TraceRay should only use RTAccelerationStructure. +; CHECK-NEXT: note: at '%invalid = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %7, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %3)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Validation failed. + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A", align 4 + %2 = load %dx.types.Handle, %dx.types.Handle* @"\01?nonas_buf@@3URWByteAddressBuffer@@A", align 4 + %3 = alloca %struct.Payload, align 4 + %4 = bitcast %struct.Payload* %3 to i8* + call void @llvm.lifetime.start(i64 12, i8* %4) #0 + %5 = getelementptr inbounds %struct.Payload, %struct.Payload* %3, i32 0, i32 0 + store <3 x float> , <3 x float>* %5, align 4, !tbaa !20 + %6 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %2) ; CreateHandleForLib(Resource) + %7 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %6, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %7, i32 0, i32 undef, float 1.100000e+01, float undef, float undef, float undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + %8 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %9 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %8, %dx.types.ResourceProperties { i32 16, i32 0 }) ; AnnotateHandle(res,props) resource: RTAccelerationStructure + + %valid = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %9, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %3) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + + %invalid = call %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32 262, %dx.types.Handle %7, i32 513, i32 1, i32 2, i32 4, i32 0, float 0.000000e+00, float 1.000000e+00, float 2.000000e+00, float 3.000000e+00, float 4.000000e+00, float 5.000000e+00, float 6.000000e+00, float 7.000000e+00, %struct.Payload* nonnull %3) ; HitObject_TraceRay(accelerationStructure,rayFlags,instanceInclusionMask,rayContributionToHitGroupIndex,multiplierForGeometryContributionToHitGroupIndex,missShaderIndex,Origin_X,Origin_Y,Origin_Z,TMin,Direction_X,Direction_Y,Direction_Z,TMax,payload) + + call void @llvm.lifetime.end(i64 12, i8* %4) #0 + ret void +} + +; Function Attrs: nounwind +declare void @llvm.lifetime.start(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @llvm.lifetime.end(i64, i8* nocapture) #0 + +; Function Attrs: nounwind +declare void @dx.op.rawBufferStore.f32(i32, %dx.types.Handle, i32, i32, float, float, float, float, i8, i32) #0 + +; Function Attrs: nounwind +declare %dx.types.HitObject @dx.op.hitObject_TraceRay.struct.Payload(i32, %dx.types.Handle, i32, i32, i32, i32, i32, float, float, float, float, float, float, float, float, %struct.Payload*) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.typeAnnotations = !{!8} +!dx.dxrPayloadAnnotations = !{!12} +!dx.entryPoints = !{!15, !17} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{!3, !6, null, null} +!3 = !{!4} +!4 = !{i32 0, %struct.RaytracingAccelerationStructure* bitcast (%dx.types.Handle* @"\01?RTAS@@3URaytracingAccelerationStructure@@A" to %struct.RaytracingAccelerationStructure*), !"RTAS", i32 -1, i32 -1, i32 1, i32 16, i32 0, !5} +!5 = !{i32 0, i32 4} +!6 = !{!7} +!7 = !{i32 0, %struct.RWByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?nonas_buf@@3URWByteAddressBuffer@@A" to %struct.RWByteAddressBuffer*), !"nonas_buf", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!8 = !{i32 1, void ()* @"\01?main@@YAXXZ", !9} +!9 = !{!10} +!10 = !{i32 1, !11, !11} +!11 = !{} +!12 = !{i32 0, %struct.Payload undef, !13} +!13 = !{!14} +!14 = !{i32 0, i32 8210} +!15 = !{null, !"", null, !2, !16} +!16 = !{i32 0, i64 8589934608} +!17 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !18} +!18 = !{i32 8, i32 7, i32 5, !19} +!19 = !{i32 0} +!20 = !{!21, !21, i64 0} +!21 = !{!"omnipotent char", !22, i64 0} +!22 = !{!"Simple C/C++ TBAA"} diff --git a/tools/clang/test/LitDXILValidation/ser_reorder_scope_sm68_failing.ll b/tools/clang/test/LitDXILValidation/ser_reorder_scope_sm68_failing.ll new file mode 100644 index 0000000000..cd93eca793 --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_reorder_scope_sm68_failing.ll @@ -0,0 +1,77 @@ +; REQUIRES: dxil-1-8 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; BAB UAV byte r/w U0 u1 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RWByteAddressBuffer = type { i32 } + +@"\01?BAB@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4 + +; CHECK: Function: ?main@@YAXXZ: error: Invalid semantic flags on DXIL operation 'BarrierByMemoryType' +; CHECK-NEXT: note: at 'call void @dx.op.barrierByMemoryType(i32 244, i32 1, i32 8)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Invalid semantic flags on DXIL operation 'barrierByMemoryHandle' +; CHECK-NEXT: note: at 'call void @dx.op.barrierByMemoryHandle(i32 245, %dx.types.Handle %3, i32 8)' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Entry function performs some operation that is incompatible with the shader stage or other entry properties. See other errors for details. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: Function uses features incompatible with the shader model. +; CHECK-NEXT: Validation failed. + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?BAB@@3URWByteAddressBuffer@@A", align 4 + call void @dx.op.barrierByMemoryType(i32 244, i32 1, i32 8) ; BarrierByMemoryType(MemoryTypeFlags,SemanticFlags) + %2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 4107, i32 0 }) ; AnnotateHandle(res,props) resource: RWByteAddressBuffer + call void @dx.op.barrierByMemoryHandle(i32 245, %dx.types.Handle %3, i32 8) ; BarrierByMemoryHandle(object,SemanticFlags) + ret void +} + +; Function Attrs: noduplicate nounwind +declare void @dx.op.barrierByMemoryType(i32, i32, i32) #1 + +; Function Attrs: noduplicate nounwind +declare void @dx.op.barrierByMemoryHandle(i32, %dx.types.Handle, i32) #1 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #2 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #3 + +attributes #0 = { nounwind } +attributes #1 = { noduplicate nounwind } +attributes #2 = { nounwind readnone } +attributes #3 = { nounwind readonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.typeAnnotations = !{!5} +!dx.entryPoints = !{!9, !11} + +!0 = !{i32 1, i32 8} +!1 = !{!"lib", i32 6, i32 8} +!2 = !{null, !3, null, null} +!3 = !{!4} +!4 = !{i32 0, %struct.RWByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?BAB@@3URWByteAddressBuffer@@A" to %struct.RWByteAddressBuffer*), !"BAB", i32 0, i32 1, i32 1, i32 11, i1 false, i1 false, i1 false, null} +!5 = !{i32 1, void ()* @"\01?main@@YAXXZ", !6} +!6 = !{!7} +!7 = !{i32 1, !8, !8} +!8 = !{} +!9 = !{null, !"", null, !2, !10} +!10 = !{i32 0, i64 8589934608} +!11 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !12} +!12 = !{i32 8, i32 7, i32 5, !13} +!13 = !{i32 0} diff --git a/tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll b/tools/clang/test/LitDXILValidation/ser_reorder_scope_sm69_passing.ll similarity index 96% rename from tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll rename to tools/clang/test/LitDXILValidation/ser_reorder_scope_sm69_passing.ll index cab9942b02..fa2733ef22 100644 --- a/tools/clang/test/HLSLFileCheck/validation/ser_reorder_scope_sm69_passing.ll +++ b/tools/clang/test/LitDXILValidation/ser_reorder_scope_sm69_passing.ll @@ -1,4 +1,7 @@ -; RUN: %dxilver 1.9 | %dxv %s +; REQUIRES: dxil-1-9 +; RUN: %dxv %s 2>&1 | FileCheck %s + +; CHECK: Validation succeeded. ; Buffer Definitions: ; diff --git a/tools/clang/test/LitDXILValidation/ser_reordercoherent_invalid_incdec.ll b/tools/clang/test/LitDXILValidation/ser_reordercoherent_invalid_incdec.ll new file mode 100644 index 0000000000..1f68a9a95f --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_reordercoherent_invalid_incdec.ll @@ -0,0 +1,92 @@ +; REQUIRES: dxil-1-9 +; RUN: not %dxv %s 2>&1 | FileCheck %s + +; COM: Original HLSL source: +; COM: reordercoherent RWStructuredBuffer buffer; +; COM: +; COM: +; COM: [Shader("raygeneration")] +; COM: void +; COM: main() +; COM: { +; COM: buffer.IncrementCounter(); +; COM: buffer.DecrementCounter(); +; COM: } + +; CHECK: error: reordercoherent cannot be used on buffer with counter 'buffer' +; CHECK-NEXT: Validation failed. + +; shader hash: 638950814a9023bf537d61dbb330a4c8 +; +; Buffer Definitions: +; +; Resource bind info for buffer +; { +; +; float $Element; ; Offset: 0 Size: 4 +; +; } +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; buffer UAV struct r/w+cnt U0u4294967295,space4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%"class.RWStructuredBuffer" = type { float } + +@"\01?buffer@@3V?$RWStructuredBuffer@M@@A" = external constant %dx.types.Handle, align 4 + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?buffer@@3V?$RWStructuredBuffer@M@@A", align 4 + %2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 102412, i32 4 }) ; AnnotateHandle(res,props) resource: reordercoherent RWStructuredBuffer + %4 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %3, i8 1) ; BufferUpdateCounter(uav,inc) + %5 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %6 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %5, %dx.types.ResourceProperties { i32 102412, i32 4 }) ; AnnotateHandle(res,props) resource: reordercoherent RWStructuredBuffer + %7 = call i32 @dx.op.bufferUpdateCounter(i32 70, %dx.types.Handle %6, i8 -1) ; BufferUpdateCounter(uav,inc) + ret void +} + +; Function Attrs: nounwind +declare i32 @dx.op.bufferUpdateCounter(i32, %dx.types.Handle, i8) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.typeAnnotations = !{!6} +!dx.entryPoints = !{!10, !12} + +!0 = !{i32 1, i32 9} +!1 = !{!"lib", i32 6, i32 9} +!2 = !{null, !3, null, null} +!3 = !{!4} +!4 = !{i32 0, %"class.RWStructuredBuffer"* bitcast (%dx.types.Handle* @"\01?buffer@@3V?$RWStructuredBuffer@M@@A" to %"class.RWStructuredBuffer"*), !"buffer", i32 -1, i32 -1, i32 1, i32 12, i1 false, i1 true, i1 false, !5} +!5 = !{i32 1, i32 4, i32 4, i1 true} +!6 = !{i32 1, void ()* @"\01?main@@YAXXZ", !7} +!7 = !{!8} +!8 = !{i32 1, !9, !9} +!9 = !{} +!10 = !{null, !"", null, !2, !11} +!11 = !{i32 0, i64 8589934608} +!12 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !13} +!13 = !{i32 8, i32 7, i32 5, !14} +!14 = !{i32 0} \ No newline at end of file diff --git a/tools/clang/test/LitDXILValidation/ser_reordercoherent_invalid_sm.ll b/tools/clang/test/LitDXILValidation/ser_reordercoherent_invalid_sm.ll new file mode 100644 index 0000000000..efcb7d3c2b --- /dev/null +++ b/tools/clang/test/LitDXILValidation/ser_reordercoherent_invalid_sm.ll @@ -0,0 +1,83 @@ +; REQUIRES: dxil-1-8 +; RUN: not %dxv %s 2>&1 | FileCheck %s + + +; CHECK: error: reordercoherent requires SM 6.9 or later. 'buf' +; CHECK-NEXT: Function: ?main@@YAXXZ: error: reordercoherent requires SM 6.9 or later. +; CHECK-NEXT: note: at '%3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 69643, i32 0 })' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Function: ?main@@YAXXZ: error: reordercoherent requires SM 6.9 or later. +; CHECK-NEXT: note: at '%3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 69643, i32 0 })' in block '#0' of function '?main@@YAXXZ'. +; CHECK-NEXT: Validation failed. +; COM: Original HLSL source: +; COM: reordercoherent RWByteAddressBuffer buf; +; COM: +; COM: [Shader("raygeneration")] +; COM: void main() +; COM: { +; COM: buf.Store(0, 11.f); +; COM: } + +; shader hash: f7be6354830d1423764991adcfc26b0b +; +; Buffer Definitions: +; +; +; Resource Bindings: +; +; Name Type Format Dim ID HLSL Bind Count +; ------------------------------ ---------- ------- ----------- ------- -------------- ------ +; buf UAV byte r/w U0u4294967295,space4294967295 1 +; +target datalayout = "e-m:e-p:32:32-i1:32-i8:32-i16:32-i32:32-i64:64-f16:32-f32:32-f64:64-n8:16:32:64" +target triple = "dxil-ms-dx" + +%dx.types.Handle = type { i8* } +%dx.types.ResourceProperties = type { i32, i32 } +%struct.RWByteAddressBuffer = type { i32 } + +@"\01?buf@@3URWByteAddressBuffer@@A" = external constant %dx.types.Handle, align 4 + +; Function Attrs: nounwind +define void @"\01?main@@YAXXZ"() #0 { + %1 = load %dx.types.Handle, %dx.types.Handle* @"\01?buf@@3URWByteAddressBuffer@@A", align 4 + %2 = call %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32 160, %dx.types.Handle %1) ; CreateHandleForLib(Resource) + %3 = call %dx.types.Handle @dx.op.annotateHandle(i32 216, %dx.types.Handle %2, %dx.types.ResourceProperties { i32 69643, i32 0 }) ; AnnotateHandle(res,props) resource: reordercoherent RWByteAddressBuffer + call void @dx.op.rawBufferStore.f32(i32 140, %dx.types.Handle %3, i32 0, i32 undef, float 1.100000e+01, float undef, float undef, float undef, i8 1, i32 4) ; RawBufferStore(uav,index,elementOffset,value0,value1,value2,value3,mask,alignment) + ret void +} + +; Function Attrs: nounwind +declare void @dx.op.rawBufferStore.f32(i32, %dx.types.Handle, i32, i32, float, float, float, float, i8, i32) #0 + +; Function Attrs: nounwind readnone +declare %dx.types.Handle @dx.op.annotateHandle(i32, %dx.types.Handle, %dx.types.ResourceProperties) #1 + +; Function Attrs: nounwind readonly +declare %dx.types.Handle @dx.op.createHandleForLib.dx.types.Handle(i32, %dx.types.Handle) #2 + +attributes #0 = { nounwind } +attributes #1 = { nounwind readnone } +attributes #2 = { nounwind readonly } + +!dx.version = !{!0} +!dx.valver = !{!0} +!dx.shaderModel = !{!1} +!dx.resources = !{!2} +!dx.typeAnnotations = !{!3} +!dx.entryPoints = !{!4, !5} + +!0 = !{i32 1, i32 8} +!1 = !{!"lib", i32 6, i32 8} +!2 = !{null, !6, null, null} +!3 = !{i32 1, void ()* @"\01?main@@YAXXZ", !7} +!4 = !{null, !"", null, !2, !8} +!5 = !{void ()* @"\01?main@@YAXXZ", !"\01?main@@YAXXZ", null, null, !9} +!6 = !{!10} +!7 = !{!11} +!8 = !{i32 0, i64 8589934608} +!9 = !{i32 8, i32 7, i32 5, !12} +!10 = !{i32 0, %struct.RWByteAddressBuffer* bitcast (%dx.types.Handle* @"\01?buf@@3URWByteAddressBuffer@@A" to %struct.RWByteAddressBuffer*), !"buf", i32 -1, i32 -1, i32 1, i32 11, i1 false, i1 false, i1 false, !13} +!11 = !{i32 1, !14, !14} +!12 = !{i32 0} +!13 = !{i32 4, i1 true} +!14 = !{} diff --git a/tools/clang/test/LitDXILValidation/vector-validation.ll b/tools/clang/test/LitDXILValidation/vector-validation.ll index 74e8116e88..b32ac0cd5c 100644 --- a/tools/clang/test/LitDXILValidation/vector-validation.ll +++ b/tools/clang/test/LitDXILValidation/vector-validation.ll @@ -1,3 +1,4 @@ +; REQUIRES: dxil-1-9 ; RUN: not %dxv %s 2>&1 | FileCheck %s ; Confirm that 6.9 specific LLVM operations and DXIL intrinsics fail in 6.8 diff --git a/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl b/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl deleted file mode 100644 index ece7e3f2f4..0000000000 --- a/tools/clang/test/SemaHLSL/attributes/spv.inline.decorate.member.hlsl +++ /dev/null @@ -1,13 +0,0 @@ -// REQUIRES: spirv -// RUN: %dxc -T ps_6_0 -E main -verify -spirv %s - -struct S -{ - [[vk::ext_decorate_id(/*offset*/ 35, 0)]] float4 f1; /* expected-error{{'ext_decorate_id' attribute only applies to functions, variables, parameters, and types}} */ - [[vk::ext_decorate_string(/*offset*/ 35, "16")]] float4 f2; /* expected-error{{'ext_decorate_string' attribute only applies to functions, variables, parameters, and types}} */ -}; - -float4 main() : SV_TARGET -{ - -} diff --git a/tools/clang/test/SemaHLSL/effects-syntax.hlsl b/tools/clang/test/SemaHLSL/effects-syntax.hlsl index 5a7492a9da..e5468cbd41 100644 --- a/tools/clang/test/SemaHLSL/effects-syntax.hlsl +++ b/tools/clang/test/SemaHLSL/effects-syntax.hlsl @@ -108,12 +108,10 @@ static const PixelShader ps1 { state=foo; }; /* expected-warning /*verify-ast No matching AST found for line! */ -// expected-note@? {{'PixelShader' declared here}} PixelShadeR ps < int foo=1;> = ps1; // Case insensitive! /* expected-error {{unknown type name 'PixelShadeR'; did you mean 'PixelShader'?}} expected-warning {{effect object ignored - effect syntax is deprecated}} expected-warning {{possible effect annotation ignored - effect syntax is deprecated}} fxc-pass {{}} */ /*verify-ast No matching AST found for line! */ -// expected-note@? {{'VertexShader' declared here}} VertexShadeR vs; // Case insensitive! /* expected-error {{unknown type name 'VertexShadeR'; did you mean 'VertexShader'?}} expected-warning {{effect object ignored - effect syntax is deprecated}} fxc-pass {{}} */ // Case sensitive diff --git a/tools/clang/test/SemaHLSL/enum_sizeof.hlsl b/tools/clang/test/SemaHLSL/enum_sizeof.hlsl new file mode 100644 index 0000000000..71723976a9 --- /dev/null +++ b/tools/clang/test/SemaHLSL/enum_sizeof.hlsl @@ -0,0 +1,31 @@ +// RUN: %dxc -T cs_6_9 -E main %s -ast-dump-implicit | FileCheck %s --check-prefix AST + +enum E1 : uint64_t +{ + v1 = 0, +}; + +enum E2 : uint32_t +{ + v2 = 0, +}; + +struct S { + E1 e1; + E2 e2; +}; + +RWBuffer b; + +[numthreads(128, 1, 1)] +void main() +{ +// AST: UnaryExprOrTypeTraitExpr {{.*}} 'unsigned long' sizeof 'E1' + b[0] = sizeof(E1); + +// AST: UnaryExprOrTypeTraitExpr {{.*}} 'unsigned long' sizeof 'E2' + b[1] = sizeof(E2); + +// AST: UnaryExprOrTypeTraitExpr {{.*}} 'unsigned long' sizeof 'S' + b[2] = sizeof(S); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_add_invalid.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_add_invalid.hlsl new file mode 100644 index 0000000000..866fad8225 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_add_invalid.hlsl @@ -0,0 +1,1398 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types %s -verify + +#include + +using namespace dx::linalg; + +ByteAddressBuffer input_vector_buffer; +ByteAddressBuffer matrix_buffer; +ByteAddressBuffer bias_buffer; +RWByteAddressBuffer output_vector_buffer; +ByteAddressBuffer constants_buffer; + +// Output vector, isUnsigned mismatch +void test_invalid_output_vector_type() { + + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector output_vector_0; + const uint is_output_unsigned_0 = 0; + + // expected-error@+1 {{IsOuputUnsigned must be true for vector of unsigned integer type}} + __builtin_MatVecMulAdd(output_vector_0, is_output_unsigned_0, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector output_vector_1; + const uint is_output_unsigned_1 = 1; + + // expected-error@+1 {{IsOuputUnsigned must be false for vector of signed integer type}} + __builtin_MatVecMulAdd(output_vector_1, is_output_unsigned_1, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector output_vector_2; + const uint is_output_unsigned_2 = 1; + + // expected-error@+1 {{IsOuputUnsigned must be false for vector of floating point type}} + __builtin_MatVecMulAdd(output_vector_2, is_output_unsigned_2, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// IsOutputUnsigned is not a constant parameter +void test_invalid_is_output_unsigned_non_const() { + + vector output_vector_0; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint is_output_unsigned_0 = constants_buffer.Load(0); + + // expected-error@+1 {{expression is not an integer constant expression}} + __builtin_MatVecMulAdd(output_vector_0, is_output_unsigned_0, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Input vector is incorrect type - 64 bit types +void test_invalid_input_vector_type() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 0; + +// expected-error@+2 {{no matching function for call to '__builtin_MatVecMulAdd'}} +// expected-note@+1 {{candidate function not viable: no known conversion from 'vector' to 'vector' for 3rd argument}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 1; + +// expected-error@+2 {{no matching function for call to '__builtin_MatVecMulAdd'}} +// expected-note@+1 {{candidate function not viable: no known conversion from 'vector' to 'vector' for 3rd argument}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_2 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_2 = 0; + +// expected-error@+2 {{no matching function for call to '__builtin_MatVecMulAdd'}} +// expected-note@+1 {{candidate function not viable: no known conversion from 'vector' to 'vector' for 3rd argument}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_2, + is_input_unsigned_2, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Input vector is incorrect type for packed InputInterpretation +void test_invalid_input_vector_type_packed_input_interpretation() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint input_interpretation_0 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 1; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 0; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_2 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_2 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_2 = 1; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_2, + is_input_unsigned_2, input_interpretation_2, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_3 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_3 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_3 = 0; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_3, + is_input_unsigned_3, input_interpretation_3, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_4 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_4 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_4 = 0; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_4, + is_input_unsigned_4, input_interpretation_4, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// IsInputUnsigned must be true for packed input vector type +void test_invalid_is_input_unsigned_packed_input_vector_type() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 0; + + // expected-error@+2 {{IsInputUnsigned must be true for packed input interpretations in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_1 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 0; + + // expected-error@+2 {{IsInputUnsigned must be true for packed input interpretations in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check packed input vector dimension +void test_invalid_packed_input_vector_dimension() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_UINT8; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL; + const bool matrix_is_transposed = false; + const uint matrix_stride = 0; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_UINT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_0 = 4; + + // expected-error@+1 {{packed input vector length must be the smallest number that can hold matrix dim K values of the packed(smaller) type in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_1 = 7; + + // expected-error@+1 {{packed input vector length must be the smallest number that can hold matrix dim K values of the packed(smaller) type in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_1, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_2 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_2 = 7; + + // expected-error@+1 {{packed input vector length must be the smallest number that can hold matrix dim K values of the packed(smaller) type in linalg mul/muladd operations}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_2, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_2, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + +} + +// Check is Input vector type/isInputUnsigned matched +void test_invalid_input_vector_type_mismatch() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 0; + + // expected-error@+2 {{IsInputUnsigned must be true for vector of unsigned integer type}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 1; + + // expected-error@+2 {{IsInputUnsigned must be false for vector of signed integer type}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_2 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_2 = 1; + + // expected-error@+2 {{IsInputUnsigned must be false for vector of floating point type}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_2, + is_input_unsigned_2, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check is Matrix M dimension is a constant parameter +void test_invalid_matrix_M_dimension() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint matrix_dimM = constants_buffer.Load(0); + + // expected-error@+3 {{expression is not an integer constant expression}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check is Matrix K dimension is a constant parameter +void test_invalid_matrix_K_dimension() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint matrix_dimK = constants_buffer.Load(0); + + // expected-error@+4 {{expression is not an integer constant expression}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check is Matrix M dimension is non-zero +void test_invalid_matrix_M_dimension_non_zero() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint matrix_dimM = 0; + // expected-error@+3 {{matrix dimension must be greater than 0}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check is Matrix K dimension is non-zero +void test_invalid_matrix_K_dimension_non_zero() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint matrix_dimK = 0; + // expected-error@+4 {{matrix dimension must be greater than 0}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if Matrix M dimension is less than Max +void test_invalid_matrix_M_dimension_less_than_Max() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = matrix_dimK * 4; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM_0 = 1025; + + // expected-error@+3 {{matrix dimension M must be less than 1024, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM_0, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_dimM_1 = 4097; + + // expected-error@+3 {{matrix dimension M must be less than 1024, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM_1, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if Matrix K dimension is less than Max in unpacked input vector case +void test_invalid_matrix_K_dimension_less_than_Max_unpacked_input_vector() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK_0 = 1025; + + // expected-error@+4 {{matrix dimension K when using unpacked input vectors must be less than 1024, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8; + const uint matrix_dimK_1 = 4096; + // expected-error@+4 {{matrix dimension K when using unpacked input vectors must be less than 1024, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_1, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + +} + +// Check if Matrix M dimension is less than Max in packed input vector case +void test_invalid_matrix_M_dimension_less_than_Max_packed_input_vector() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 1024; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 4096; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_dimK_0 = 4097; + + // expected-error@+4 {{matrix dimension K when using packed input vectors must be less than 4096, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +void test_invalid_input_interpretation_non_const() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint input_interpretation = constants_buffer.Load(0); + + // expected-error@+2 {{expression is not an integer constant expression}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if InputInterpretation is a valid value +void test_invalid_input_interpretation_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint input_interpretation_0 = 0; + + // expected-error@+2 {{0 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_1 = 1; + + // expected-error@+2 {{1 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_2 = 6; + + // expected-error@+2 {{6 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_2, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_3 = 7; + + // expected-error@+2 {{7 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_3, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_4 = 10; + + // expected-error@+2 {{10 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_4, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_5 = 11; + + // expected-error@+2 {{11 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_5, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_6 = 12; + + // expected-error@+2 {{12 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_6, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_7 = 13; + + // expected-error@+2 {{13 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_7, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_8 = 14; + + // expected-error@+2 {{14 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_8, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_9 = 15; + + // expected-error@+2 {{15 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_9, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_10 = 16; + + // expected-error@+2 {{16 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_10, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_11 = 23; + + // expected-error@+2 {{23 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_11, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_12 = 100; + + // expected-error@+2 {{100 is an invalid register interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_12, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} +// Check if Input and Output vector dimensions are valid -non packed +void test_invalid_input_output_vector_dimensions_non_packed_square_matrix() { + + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 32; + const uint matrix_dimK = 32; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector output_vector_0; + vector input_vector_0 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector_0, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector output_vector_1; + vector input_vector_1 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector_1, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if Input and Output vector dimensions are valid -non packed +void test_invalid_input_output_vector_dimensions_non_packed_rectangle_matrix() { + + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 16; + const uint matrix_dimK = 32; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + // Use dimension of Matrix K to trigger error + vector output_vector_0; + vector input_vector_0 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector_0, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + // Check off by 1 errors + vector output_vector_1; + vector input_vector_1 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector_1, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + // Check off by 1 errors + vector output_vector_2; + vector input_vector_2 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector_2, is_output_unsigned, input_vector_2, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + // Use dimension of Matrix M to trigger error + vector output_vector_3; + vector input_vector_3 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector_3, is_output_unsigned, input_vector_3, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + // Check off by 1 errors + vector output_vector_4; + vector input_vector_4 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector_4, is_output_unsigned, input_vector_4, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + // Check off by 1 errors + vector output_vector_5; + vector input_vector_5 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector_5, is_output_unsigned, input_vector_5, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + // Swap dimensions to trigger error + vector output_vector_6; + vector input_vector_6 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMulAdd(output_vector_6, is_output_unsigned, input_vector_6, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if matrtrix interpretation is a constant value +void test_invalid_matrix_interpretation_constant_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint matrix_interpretation_0 = constants_buffer.Load(0); + + // expected-error@+3 {{expression is not an integer constant expression}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_0, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check for invalid matrix interpretation value +void test_invalid_matrix_interpretation_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint matrix_interpretation_0 = 0; + + // expected-error@+3 {{0 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_0, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_1 = 1; + + // expected-error@+3 {{1 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_1, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_2 = 6; + + // expected-error@+3 {{6 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_2, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_3 = 7; + + // expected-error@+3 {{7 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_3, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_4 = 10; + + // expected-error@+3 {{10 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_4, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_5 = 11; + + // expected-error@+3 {{11 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_5, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_6 = 12; + + // expected-error@+3 {{12 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_6, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_7 = 13; + + // expected-error@+3 {{13 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_7, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_8 = 14; + + // expected-error@+3 {{14 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_8, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_9 = 15; + + // expected-error@+3 {{15 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_9, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_10 = 16; + + // expected-error@+3 {{16 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_10, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_11 = 23; + // expected-error@+3 {{23 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_11, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_interpretation_12 = 100; + + // expected-error@+3 {{100 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_12, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if matrix Layout is a constant value +void test_invalid_matrix_layout_constant_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint matrix_layout = constants_buffer.Load(0); + + // expected-error@+4 {{expression is not an integer constant expression}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check invalid matrix layout value +void test_invalid_matrix_layout_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint matrix_layout_0 = 4; + + // expected-error@+4 {{matrix layout 4 is not valid, must be in the range [0, 3]}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_0, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if matrix is transposed is a constant value +void test_invalid_matrix_transposed_constant_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = constants_buffer.Load(0); + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + // expected-error@+4 {{expression is not an integer constant expression}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if invalid matrix transpose value is used +void test_invalid_matrix_transpose_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint matrix_layout_0 = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed_0 = true; + + // expected-error@+4 {{RowMajor and ColumnMajor matrices are not transposable}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_0, matrix_is_transposed_0, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_layout_1 = MatrixLayout::MATRIX_LAYOUT_COLUMN_MAJOR; + const bool matrix_is_transposed_1 = true; + + // expected-error@+4 {{RowMajor and ColumnMajor matrices are not transposable}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_1, matrix_is_transposed_1, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + + +// Check invalid matrix stride value for optimal matrix layout +void test_invalid_matrix_stride_constant_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const bool matrix_is_transposed = false; + + const uint matrix_layout_0 = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL; + const uint matrix_stride_0 = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + // expected-error@+5 {{for optimal matrix layout, matrix stride must be 0}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_0, matrix_is_transposed, + matrix_stride_0, bias_buffer, bias_offset, bias_interpretation); + + const uint matrix_layout_1 = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL; + const uint matrix_stride_1 = 64; + + // expected-error@+5 {{for optimal matrix layout, matrix stride must be 0}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_1, matrix_is_transposed, + matrix_stride_1, bias_buffer, bias_offset, bias_interpretation); +} + +// Check bias interpretation is not a constant value +void test_invalid_bias_interpretation() { + vector output_vector; + const uint is_output_unsigned = 0; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const uint matrix_is_transposed = 0; + const uint matrix_stride = 0; + const uint bias_offset = 0; + + const uint bias_interpretation_0 = constants_buffer.Load(0); + + // expected-error@+6 {{expression is not an integer constant expression}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_0); +} + +// Check bias interpretation is not a valid value +void test_invalid_bias_interpretation_value() { + vector output_vector; + const uint is_output_unsigned = 0; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const uint matrix_is_transposed = 0; + const uint matrix_stride = 0; + const uint bias_offset = 0; + + const uint bias_interpretation_0 = 0; + + // expected-error@+6 {{0 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_0); + + const uint bias_interpretation_1 = 1; + + // expected-error@+6 {{1 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_1); + + const uint bias_interpretation_2 = 6; + + // expected-error@+6 {{6 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_2); + + const uint bias_interpretation_3 = 7; + + // expected-error@+6 {{7 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_3); + + const uint bias_interpretation_4 = 10; + + // expected-error@+6 {{10 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_4); + + const uint bias_interpretation_5 = 11; + + // expected-error@+6 {{11 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_5); + + const uint bias_interpretation_6 = 12; + + // expected-error@+6 {{12 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_6); + + const uint bias_interpretation_7 = 13; + + // expected-error@+6 {{13 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_7); + + const uint bias_interpretation_8 = 14; + + // expected-error@+6 {{14 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_8); + + const uint bias_interpretation_9 = 15; + + // expected-error@+6 {{15 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_9); + + const uint bias_interpretation_10 = 16; + + // expected-error@+6 {{16 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_10); + + const uint bias_interpretation_11 = DataType::DATA_TYPE_SINT8_T4_PACKED; + + // expected-error@+6 {{17 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_11); + + const uint bias_interpretation_12 = DataType::DATA_TYPE_UINT8_T4_PACKED; + + // expected-error@+6 {{18 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_12); + + const uint bias_interpretation_13 = 23; + + // expected-error@+6 {{23 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_13); + + const uint bias_interpretation_14 = 100; + + // expected-error@+6 {{100 is an invalid memory interpretation value}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, + bias_interpretation_14); + } diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_add_valid.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_add_valid.hlsl new file mode 100644 index 0000000000..4b0bd6dd87 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_add_valid.hlsl @@ -0,0 +1,244 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 %s + +#include + +using namespace dx::linalg; + +ByteAddressBuffer input_vector_buffer; +ByteAddressBuffer matrix_buffer; +ByteAddressBuffer bias_buffer; +RWByteAddressBuffer output_vector_buffer; +ByteAddressBuffer constants_buffer; + +// Check valid input vector packed types +void test_valid_input_vector_packed_types() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 1; + + // expected-no-diagnostics@+1 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_1 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 1; + + // expected-no-diagnostics@+1 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + +} + +// IsInputUnsigned must be true for packed input vector type +void test_valid_is_input_unsigned_packed_input_vector_type() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 1; + + // expected-no-diagnostics@+2 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + const uint input_interpretation_1 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 1; + + // expected-no-diagnostics@+2 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check packed input vector dimension +void test_valid_packed_input_vector_dimension() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_UINT8; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL; + const bool matrix_is_transposed = false; + const uint matrix_stride = 0; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_UINT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_0 = 4; + + // expected-no-diagnostics@+1 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_1 = 7; + + // expected-no-diagnostics@+1 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_1, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if Matrix M dimension is less than Max +void test_valid_matrix_M_dimension_less_than_Max() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = matrix_dimK * 4; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM_0 = 4; + + // expected-no-diagnostics@+1 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM_0, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_dimM_1 = 4; + + // expected-no-diagnostics@+1 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM_1, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + +// Check if Matrix K dimension is less than Max in unpacked input vector case +void test_valid_matrix_K_dimension_less_than_Max_unpacked_input_vector() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK_0 = 4; + + // expected-no-diagnostics@+1 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8; + const uint matrix_dimK_1 = 4; + // expected-no-diagnostics@+1 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_1, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); + +} + +// Check if Matrix M dimension is less than Max in packed input vector case +void test_valid_matrix_M_dimension_less_than_Max_packed_input_vector() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_dimK_0 = 4096; + + // expected-no-diagnostics@+1 + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride, bias_buffer, bias_offset, bias_interpretation); +} + + + diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_invalid.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_invalid.hlsl new file mode 100644 index 0000000000..14f34d62c4 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_invalid.hlsl @@ -0,0 +1,1156 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types %s -verify + +#include + +using namespace dx::linalg; + +ByteAddressBuffer input_vector_buffer; +ByteAddressBuffer matrix_buffer; +RWByteAddressBuffer output_vector_buffer; +ByteAddressBuffer constants_buffer; + +// Output vector, isUnsigned mismatch +void test_invalid_output_vector_type() { + + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + vector output_vector_0; + const uint is_output_unsigned_0 = 0; + + // expected-error@+1 {{IsOuputUnsigned must be true for vector of unsigned integer type}} + __builtin_MatVecMul(output_vector_0, is_output_unsigned_0, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector output_vector_1; + const uint is_output_unsigned_1 = 1; + + // expected-error@+1 {{IsOuputUnsigned must be false for vector of signed integer type}} + __builtin_MatVecMul(output_vector_1, is_output_unsigned_1, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector output_vector_2; + const uint is_output_unsigned_2 = 1; + + // expected-error@+1 {{IsOuputUnsigned must be false for vector of floating point type}} + __builtin_MatVecMul(output_vector_2, is_output_unsigned_2, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// IsOutputUnsigned is not a constant parameter +void test_invalid_is_output_unsigned_non_const() { + + vector output_vector_0; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint is_output_unsigned_0 = constants_buffer.Load(0); + + // expected-error@+1 {{expression is not an integer constant expression}} + __builtin_MatVecMul(output_vector_0, is_output_unsigned_0, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Input vector is incorrect type - 64 bit types +void test_invalid_input_vector_type() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 0; + +// expected-error@+2 {{no matching function for call to '__builtin_MatVecMul'}} +// expected-note@+1 {{candidate function not viable: no known conversion from 'vector' to 'vector' for 3rd argument}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 1; + +// expected-error@+2 {{no matching function for call to '__builtin_MatVecMul'}} +// expected-note@+1 {{candidate function not viable: no known conversion from 'vector' to 'vector' for 3rd argument}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_2 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_2 = 0; + +// expected-error@+2 {{no matching function for call to '__builtin_MatVecMul'}} +// expected-note@+1 {{candidate function not viable: no known conversion from 'vector' to 'vector' for 3rd argument}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_2, + is_input_unsigned_2, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Input vector is incorrect type for packed InputInterpretation +void test_invalid_input_vector_type_packed_input_interpretation() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint input_interpretation_0 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 1; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 0; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_2 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_2 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_2 = 1; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_2, + is_input_unsigned_2, input_interpretation_2, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_3 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_3 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_3 = 0; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_3, + is_input_unsigned_3, input_interpretation_3, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_4 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_4 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_4 = 0; + + // expected-error@+1 {{packed input vector type must be a 32-bit unsigned int type in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_4, + is_input_unsigned_4, input_interpretation_4, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// IsInputUnsigned must be true for packed input vector type +void test_invalid_is_input_unsigned_packed_input_vector_type() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_FLOAT32; + + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 0; + + // expected-error@+2 {{IsInputUnsigned must be true for packed input interpretations in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_1 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 0; + + // expected-error@+2 {{IsInputUnsigned must be true for packed input interpretations in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check packed input vector dimension +void test_invalid_packed_input_vector_dimension() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_UINT8; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL; + const bool matrix_is_transposed = false; + const uint matrix_stride = 0; + const uint bias_offset = 0; + const uint bias_interpretation = DataType::DATA_TYPE_UINT32; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_0 = 4; + + // expected-error@+1 {{packed input vector length must be the smallest number that can hold matrix dim K values of the packed(smaller) type in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_1 = 7; + + // expected-error@+1 {{packed input vector length must be the smallest number that can hold matrix dim K values of the packed(smaller) type in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_1, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_2 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_2 = 7; + + // expected-error@+1 {{packed input vector length must be the smallest number that can hold matrix dim K values of the packed(smaller) type in linalg mul/muladd operations}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_2, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_2, matrix_layout, matrix_is_transposed, + matrix_stride); + +} + +// Input vector type/isInputUnsigned mismatch +void test_invalid_input_vector_type_mismatch() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 0; + + // expected-error@+2 {{IsInputUnsigned must be true for vector of unsigned integer type}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 1; + + // expected-error@+2 {{IsInputUnsigned must be false for vector of signed integer type}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_2 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_2 = 1; + + // expected-error@+2 {{IsInputUnsigned must be false for vector of floating point type}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_2, + is_input_unsigned_2, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check is Matrix M dimension is a constant parameter +void test_invalid_matrix_M_dimension() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint matrix_dimM = constants_buffer.Load(0); + + // expected-error@+3 {{expression is not an integer constant expression}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check is Matrix K dimension is a constant parameter +void test_invalid_matrix_K_dimension() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint matrix_dimK = constants_buffer.Load(0); + + // expected-error@+4 {{expression is not an integer constant expression}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check is Matrix M dimension is non-zero +void test_invalid_matrix_M_dimension_non_zero() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint matrix_dimM = 0; + // expected-error@+3 {{matrix dimension must be greater than 0}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check is Matrix K dimension is non-zero +void test_invalid_matrix_K_dimension_non_zero() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint matrix_dimK = 0; + // expected-error@+4 {{matrix dimension must be greater than 0}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check if Matrix M dimension is less than Max +void test_invalid_matrix_M_dimension_less_than_Max() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = matrix_dimK * 4; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM_0 = 1025; + + // expected-error@+3 {{matrix dimension M must be less than 1024, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM_0, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_dimM_1 = 4097; + + // expected-error@+3 {{matrix dimension M must be less than 1024, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM_1, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check if Matrix K dimension is less than Max in unpacked input vector case +void test_invalid_matrix_K_dimension_less_than_Max_unpacked_input_vector() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK_0 = 1025; + + // expected-error@+4 {{matrix dimension K when using unpacked input vectors must be less than 1024, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8; + const uint matrix_dimK_1 = 4096; + // expected-error@+4 {{matrix dimension K when using unpacked input vectors must be less than 1024, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_1, matrix_layout, matrix_is_transposed, + matrix_stride); + +} + +// Check if Matrix M dimension is less than Max in packed input vector case +void test_invalid_matrix_M_dimension_less_than_Max_packed_input_vector() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 1024; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 4096; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_dimK_0 = 4097; + + // expected-error@+4 {{matrix dimension K when using packed input vectors must be less than 4096, in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +//Check if InputInterpretation is a constant parameter +void test_invalid_input_interpretation_non_const() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint input_interpretation = constants_buffer.Load(0); + + // expected-error@+2 {{expression is not an integer constant expression}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check if InputInterpretation is a valid value +void test_invalid_input_interpretation_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint input_interpretation_0 = 0; + + // expected-error@+2 {{0 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_1 = 1; + + // expected-error@+2 {{1 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_2 = 6; + + // expected-error@+2 {{6 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_2, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_3 = 7; + + // expected-error@+2 {{7 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_3, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_4 = 10; + + // expected-error@+2 {{10 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_4, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_5 = 11; + + // expected-error@+2 {{11 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_5, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_6 = 12; + + // expected-error@+2 {{12 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_6, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_7 = 13; + + // expected-error@+2 {{13 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_7, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_8 = 14; + + // expected-error@+2 {{14 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_8, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_9 = 15; + + // expected-error@+2 {{15 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_9, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_10 = 16; + + // expected-error@+2 {{16 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_10, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_11 = 23; + + // expected-error@+2 {{23 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_11, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_12 = 100; + + // expected-error@+2 {{100 is an invalid register interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation_12, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} +// Check if Input and Output vector dimensions are valid -non packed +void test_invalid_input_output_vector_dimensions_non_packed_square_matrix() { + + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 32; + const uint matrix_dimK = 32; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + vector output_vector_0; + vector input_vector_0 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector_0, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector output_vector_1; + vector input_vector_1 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector_1, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check if Input and Output vector dimensions are valid -non packed +void test_invalid_input_output_vector_dimensions_non_packed_rectangle_matrix() { + + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 16; + const uint matrix_dimK = 32; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + // Use dimension of Matrix K to trigger error + vector output_vector_0; + vector input_vector_0 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector_0, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + // Check off by 1 errors + vector output_vector_1; + vector input_vector_1 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector_1, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + // Check off by 1 errors + vector output_vector_2; + vector input_vector_2 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector_2, is_output_unsigned, input_vector_2, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + // Use dimension of Matrix M to trigger error + vector output_vector_3; + vector input_vector_3 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector_3, is_output_unsigned, input_vector_3, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + // Check off by 1 errors + vector output_vector_4; + vector input_vector_4 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector_4, is_output_unsigned, input_vector_4, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + // Check off by 1 errors + vector output_vector_5; + vector input_vector_5 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{unpacked input vector length must be equal to Matrix K dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector_5, is_output_unsigned, input_vector_5, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + // Swap dimensions to trigger error + vector output_vector_6; + vector input_vector_6 = + input_vector_buffer.Load >(0); + + // expected-error@+1 {{output vector length must be equal to Matrix M dimension in a linalg Mul/MulAdd operation}} + __builtin_MatVecMul(output_vector_6, is_output_unsigned, input_vector_6, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check if matrtrix interpretation is a constant value +void test_invalid_matrix_interpretation_constant_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint matrix_interpretation_0 = constants_buffer.Load(0); + + // expected-error@+3 {{expression is not an integer constant expression}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_0, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check for invalid matrix interpretation value +void test_invalid_matrix_interpretation_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint matrix_interpretation_0 = 0; + + // expected-error@+3 {{0 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_0, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_1 = 1; + + // expected-error@+3 {{1 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_1, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_2 = 6; + + // expected-error@+3 {{6 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_2, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_3 = 7; + + // expected-error@+3 {{7 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_3, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_4 = 10; + + // expected-error@+3 {{10 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_4, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_5 = 11; + + // expected-error@+3 {{11 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_5, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_6 = 12; + + // expected-error@+3 {{12 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_6, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_7 = 13; + + // expected-error@+3 {{13 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_7, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_8 = 14; + + // expected-error@+3 {{14 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_8, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_9 = 15; + + // expected-error@+3 {{15 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_9, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_10 = 16; + + // expected-error@+3 {{16 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_10, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_11 = 23; + // expected-error@+3 {{23 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_11, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint matrix_interpretation_12 = 100; + + // expected-error@+3 {{100 is an invalid memory interpretation value}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation_12, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check if matrix Layout is a constant value +void test_invalid_matrix_layout_constant_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint matrix_layout = constants_buffer.Load(0); + + // expected-error@+4 {{expression is not an integer constant expression}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check invalid matrix layout value +void test_invalid_matrix_layout_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint matrix_layout_0 = 4; + + // expected-error@+4 {{matrix layout 4 is not valid, must be in the range [0, 3]}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_0, matrix_is_transposed, + matrix_stride); +} + +// Check if matrix is transposed is a constant value +void test_invalid_matrix_transposed_constant_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = constants_buffer.Load(0); + const uint matrix_stride = 64; + + // expected-error@+4 {{expression is not an integer constant expression}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check if invalid matrix transpose value is used +void test_invalid_matrix_transpose_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_stride = 64; + + const uint matrix_layout_0 = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed_0 = true; + + // expected-error@+4 {{RowMajor and ColumnMajor matrices are not transposable}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_0, matrix_is_transposed_0, + matrix_stride); + + const uint matrix_layout_1 = MatrixLayout::MATRIX_LAYOUT_COLUMN_MAJOR; + const bool matrix_is_transposed_1 = true; + + // expected-error@+4 {{RowMajor and ColumnMajor matrices are not transposable}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_1, matrix_is_transposed_1, + matrix_stride); +} + + +// Check invalid matrix stride value for optimal matrix layout +void test_invalid_matrix_stride_constant_value() { + + vector output_vector; + const uint is_output_unsigned = 1; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const bool matrix_is_transposed = false; + + const uint matrix_layout_0 = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL; + const uint matrix_stride_0 = 64; + + // expected-error@+5 {{for optimal matrix layout, matrix stride must be 0}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_0, matrix_is_transposed, + matrix_stride_0); + + const uint matrix_layout_1 = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL; + const uint matrix_stride_1 = 64; + + // expected-error@+5 {{for optimal matrix layout, matrix stride must be 0}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout_1, matrix_is_transposed, + matrix_stride_1); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_valid.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_valid.hlsl new file mode 100644 index 0000000000..5972b22b95 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/mul_valid.hlsl @@ -0,0 +1,344 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types %s -verify + +#include + +using namespace dx::linalg; + +ByteAddressBuffer input_vector_buffer; +ByteAddressBuffer matrix_buffer; +RWByteAddressBuffer output_vector_buffer; +ByteAddressBuffer const_buffer; + +// Output vector, isUnsigned mismatch +void test_valid_output_vector_type() { + + vector input_vector = input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + vector output_vector_0; + const uint is_output_unsigned_0 = 1; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector_0, is_output_unsigned_0, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, + matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, + matrix_is_transposed, matrix_stride); + + vector output_vector_1; + const uint is_output_unsigned_1 = 0; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector_1, is_output_unsigned_1, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, + matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, + matrix_is_transposed, matrix_stride); + + vector output_vector_2; + const uint is_output_unsigned_2 = 0; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector_2, is_output_unsigned_2, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, + matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, + matrix_is_transposed, matrix_stride); +} + +void test_valid_is_output_unsigned_non_const() { + + vector output_vector_0; + vector input_vector = + input_vector_buffer.Load >(0); + const uint is_input_unsigned = 0; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint is_output_unsigned_0 = 1; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector_0, is_output_unsigned_0, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Input vector is incorrect type +void test_valid_input_vector_type() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 0; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 1; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_2 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_2 = 0; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_2, + is_input_unsigned_2, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check valid input vector packed types +void test_valid_input_vector_packed_types() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 1; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_1 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 1; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + +} + +// IsInputUnsigned must be true for packed input vector type +void test_valid_is_input_unsigned_packed_input_vector_type() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_0 = 1; + + // expected-no-diagnostics@+2 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned_0, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + const uint input_interpretation_1 = DataType::DATA_TYPE_SINT8_T4_PACKED; + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint is_input_unsigned_1 = 1; + + // expected-no-diagnostics@+2 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned_1, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check packed input vector dimension +void test_valid_packed_input_vector_dimension() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint input_interpretation = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_UINT8; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL; + const bool matrix_is_transposed = false; + const uint matrix_stride = 0; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_0 = 4; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint matrix_dimK_1 = 7; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_1, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check if Matrix M dimension is less than Max +void test_valid_matrix_M_dimension_less_than_Max() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = matrix_dimK * 4; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM_0 = 4; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM_0, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_dimM_1 = 4; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM_1, + matrix_dimK, matrix_layout, matrix_is_transposed, + matrix_stride); +} + +// Check if Matrix K dimension is less than Max in unpacked input vector case +void test_valid_matrix_K_dimension_less_than_Max_unpacked_input_vector() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimK_0 = 4; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride); + + vector input_vector_1 = + input_vector_buffer.Load >(0); + const uint input_interpretation_1 = DataType::DATA_TYPE_UINT8; + const uint matrix_dimK_1 = 4; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_1, + is_input_unsigned, input_interpretation_1, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_1, matrix_layout, matrix_is_transposed, + matrix_stride); + +} + +// Check if Matrix M dimension is less than Max in packed input vector case +void test_valid_matrix_M_dimension_less_than_Max_packed_input_vector() { + + vector output_vector; + const uint is_output_unsigned = 1; + const uint is_input_unsigned = 1; + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_dimM = 4; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + vector input_vector_0 = + input_vector_buffer.Load >(0); + const uint input_interpretation_0 = DataType::DATA_TYPE_UINT8_T4_PACKED; + const uint matrix_dimK_0 = 4096; + + // expected-no-diagnostics@+1 + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector_0, + is_input_unsigned, input_interpretation_0, matrix_buffer, + matrix_offset, matrix_interpretation, matrix_dimM, + matrix_dimK_0, matrix_layout, matrix_is_transposed, + matrix_stride); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/outer_product_accumulate_invalid.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/outer_product_accumulate_invalid.hlsl new file mode 100644 index 0000000000..4e15c92a5d --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/outer_product_accumulate_invalid.hlsl @@ -0,0 +1,256 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types %s -verify + +#include + +using namespace dx::linalg; + +ByteAddressBuffer input_vector_buffer; +RWByteAddressBuffer accumulate_buffer; +ByteAddressBuffer constants_buffer; + +// Check if input vectors aren't the same component type +void test_invalid_input_vector_component_type() { + + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL; + const uint matrix_stride = 0; + + vector input_vector_0_0 = input_vector_buffer.Load >(0); + vector input_vector_1_0 = input_vector_buffer.Load >(0); + + // expected-error@+1 {{input vectors of outerproductaccumulate must have the same element type}} + __builtin_OuterProductAccumulate(input_vector_0_0, input_vector_1_0, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); + + vector input_vector_0_1 = input_vector_buffer.Load >(0); + vector input_vector_1_1 = input_vector_buffer.Load >(0); + + // expected-error@+1 {{input vectors of outerproductaccumulate must have the same element type}} + __builtin_OuterProductAccumulate(input_vector_0_1, input_vector_1_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); +} + +// Check for non constant matrix interpretation +void test_non_constant_matrix_interpretation() { + + vector input_vector_0 = input_vector_buffer.Load >(0); + vector input_vector_1 = input_vector_buffer.Load >(0); + const uint matrix_offset = 0; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL; + const uint matrix_stride = 0; + + const uint matrix_interpretation = constants_buffer.Load(0); + + // expected-error@+3 {{expression is not an integer constant expression}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); +} + +// Check for matrix interpretation is not a valid value +void test_invalid_matrix_interpretation() { + + vector input_vector_0 = input_vector_buffer.Load >(0); + vector input_vector_1 = input_vector_buffer.Load >(0); + const uint matrix_offset = 0; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL; + const uint matrix_stride = 0; + + const uint matrix_interpretation = 0; + + // expected-error@+3 {{0 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_2 = 1; + + // expected-error@+3 {{1 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_2, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_3 = 6; + + // expected-error@+3 {{6 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_3, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_4 = 7; + + // expected-error@+3 {{7 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_4, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_5 = 10; + + // expected-error@+3 {{10 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_5, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_6 = 11; + + // expected-error@+3 {{11 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_6, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_7 = 12; + + // expected-error@+3 {{12 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_7, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_8 = 13; + + // expected-error@+3 {{13 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_8, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_9 = 14; + + // expected-error@+3 {{14 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_9, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_10 = 15; + + // expected-error@+3 {{15 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_10, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_11 = 16; + + // expected-error@+3 {{16 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_11, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_12 = DataType::DATA_TYPE_SINT8_T4_PACKED; + + // expected-error@+3 {{17 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_12, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_13 = DataType::DATA_TYPE_UINT8_T4_PACKED; + + // expected-error@+3 {{18 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_13, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_14 = 23; + + // expected-error@+3 {{23 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_14, matrix_layout, + matrix_stride); + + const uint matrix_interpretation_15 = 100; + + // expected-error@+3 {{100 is an invalid memory interpretation value}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation_15, matrix_layout, + matrix_stride); + +} + +// Check for matrix layout is not a constant parameter +void test_non_constant_matrix_layout() { + + vector input_vector_0 = input_vector_buffer.Load >(0); + vector input_vector_1 = input_vector_buffer.Load >(0); + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_stride = 0; + + const uint matrix_layout = constants_buffer.Load(0); + + // expected-error@+3 {{expression is not an integer constant expression}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); +} + +// Check for matrix layout is not a valid value +void test_invalid_matrix_layout() { + + vector input_vector_0 = input_vector_buffer.Load >(0); + vector input_vector_1 = input_vector_buffer.Load >(0); + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_stride = 0; + + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_ROW_MAJOR; + + // expected-error@+3 {{matrix layout for outerproductaccumulate must be 3}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); + + const uint matrix_layout_2 = MatrixLayout::MATRIX_LAYOUT_COLUMN_MAJOR; + + // expected-error@+3 {{matrix layout for outerproductaccumulate must be 3}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout_2, + matrix_stride); + + const uint matrix_layout_3 = MatrixLayout::MATRIX_LAYOUT_MUL_OPTIMAL; + + // expected-error@+3 {{matrix layout for outerproductaccumulate must be 3}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout_3, + matrix_stride); + +} + +// Check for matrix stride is zero, if constant +void test_zero_matrix_stride() { + + vector input_vector_0 = input_vector_buffer.Load >(0); + vector input_vector_1 = input_vector_buffer.Load >(0); + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL; + + const uint matrix_stride = 16; + + // expected-error@+4 {{for optimal matrix layout, matrix stride must be 0}} + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/outer_product_accumulate_valid.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/outer_product_accumulate_valid.hlsl new file mode 100644 index 0000000000..85298e2dbb --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/builtins/outer_product_accumulate_valid.hlsl @@ -0,0 +1,66 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types %s -verify + +#include + +using namespace dx::linalg; + +ByteAddressBuffer input_vector_buffer; +RWByteAddressBuffer accumulate_buffer; +ByteAddressBuffer constants_buffer; + +// Check for input vectors aren't the same component type +void test_invalid_input_vector_component_type() { + + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL; + const uint matrix_stride = 0; + + vector input_vector_0_0 = input_vector_buffer.Load >(0); + vector input_vector_1_0 = input_vector_buffer.Load >(0); + + // expected-no-diagnostics@+1 + __builtin_OuterProductAccumulate(input_vector_0_0, input_vector_1_0, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); + + vector input_vector_0_1 = input_vector_buffer.Load >(0); + vector input_vector_1_1 = input_vector_buffer.Load >(0); + + // expected-no-diagnostics@+1 + __builtin_OuterProductAccumulate(input_vector_0_1, input_vector_1_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); + + vector input_vector_0_2 = input_vector_buffer.Load >(0); + vector input_vector_1_2 = input_vector_buffer.Load >(0); + + // expected-no-diagnostics@+1 + __builtin_OuterProductAccumulate(input_vector_0_2, input_vector_1_2, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); +} + +// Check for non constant matrix stride +void test_non_constant_matrix_stride() { + + vector input_vector_0 = input_vector_buffer.Load >(0); + vector input_vector_1 = input_vector_buffer.Load >(0); + const uint matrix_offset = 0; + const uint matrix_interpretation = DataType::DATA_TYPE_FLOAT32; + const uint matrix_layout = MatrixLayout::MATRIX_LAYOUT_OUTER_PRODUCT_OPTIMAL; + + const uint matrix_stride = constants_buffer.Load(0); + + // expected-no-diagnostics@+4 + __builtin_OuterProductAccumulate(input_vector_0, input_vector_1, + accumulate_buffer, matrix_offset, + matrix_interpretation, matrix_layout, + matrix_stride); +} + +// Check for matrix stride is not a valid value + diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/make-interp-vec-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/make-interp-vec-errors.hlsl new file mode 100644 index 0000000000..be67d92546 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/make-interp-vec-errors.hlsl @@ -0,0 +1,33 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 %s -verify + +#include +ByteAddressBuffer Buf; + +export float4 Test1(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 0}; + + // expected-error@+3{{no matching function for call to 'MakeInterpretedVector'}} + // expected-note@dx/linalg.h:113{{candidate template ignored: invalid explicitly-specified argument for template parameter 'DT'}} + return Mul( + Matrix, MakeInterpretedVector<2>(Input)); +} + +enum DataType { + DATA_TYPE_InvalidType = 40 +}; + +export float4 Test2(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 0}; + + // expected-error@+3{{no matching function for call to 'MakeInterpretedVector'}} + // expected-note@dx/linalg.h:113{{candidate template ignored: invalid explicitly-specified argument for template parameter 'DT'}} + return Mul( + Matrix, MakeInterpretedVector(Input)); +} + diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-mul-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-mul-errors.hlsl new file mode 100644 index 0000000000..b911de648e --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-mul-errors.hlsl @@ -0,0 +1,16 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 %s -verify + +#include + +ByteAddressBuffer Buf; + +vector MixUpVectorAndMatrixArguments(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 0}; + + // expected-error@+2{{no matching function for call to 'Mul'}} + // expected-note@dx/linalg.h:127{{candidate template ignored: could not match 'MatrixRefImpl' against 'InterpretedVector'}} + return Mul(MakeInterpretedVector(Input), Matrix); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-mul-transpose-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-mul-transpose-errors.hlsl new file mode 100644 index 0000000000..2018acafab --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-mul-transpose-errors.hlsl @@ -0,0 +1,30 @@ +// XFAIL: * +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types %s -verify + +#include + +ByteAddressBuffer Buf; + +export float4 Test1(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 0}; + + // PREVIEW CHECK TODO: + // expected-error@+1{{something about transposing not supported for rowmajor / colmajor layouts}} + return Mul( + Matrix, MakeInterpretedVector(Input)); +} + +export vector Test2(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 0}; + + // PREVIEW CHECK TODO: + // expected-error@+1{{something about transposing not supported for rowmajor / colmajor layouts}} + return Mul(Matrix, + MakeInterpretedVector(Input)); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-muladd-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-muladd-errors.hlsl new file mode 100644 index 0000000000..24ad3ef46c --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/mat-vec-muladd-errors.hlsl @@ -0,0 +1,16 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 %s -verify + +#include + +ByteAddressBuffer Buf; + +vector MixUpVectorAndMatrixArguments(vector Input) { + using namespace dx::linalg; + + MatrixRef Matrix = { + Buf, 0, 0}; + + // expected-error@+2{{no matching function for call to 'MulAdd'}} + // expected-note@dx/linalg.h:153{{candidate template ignored: could not match 'MatrixRefImpl' against 'InterpretedVector'}} + return MulAdd(MakeInterpretedVector(Input), Matrix, MakeInterpretedVector(Input)); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/outerproductaccumulate-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/outerproductaccumulate-errors.hlsl new file mode 100644 index 0000000000..5759631bcb --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/outerproductaccumulate-errors.hlsl @@ -0,0 +1,44 @@ +// RUN: %dxc -I %hlsl_headers -T lib_6_9 -enable-16bit-types %s -verify + +#include + +RWByteAddressBuffer RWBuf; + +// test for inputs of different size +export void Test4(vector Input1, vector Input2) { + using namespace dx::linalg; + + RWMatrixRef + matrix = {RWBuf, 0, 0}; + + // expected-error@+3{{no matching function for call to 'OuterProductAccumulate'}} + // expected-note@dx/linalg.h:177{{candidate template ignored: could not match 0 against 1}} + + OuterProductAccumulate(Input1, Input2, matrix); +} + +// now test for an error when element types differ +export void Test5(vector Input1, vector Input2) { + using namespace dx::linalg; + + RWMatrixRef + matrix = {RWBuf, 0, 0}; + + // expected-error@+3{{no matching function for call to 'OuterProductAccumulate'}} + // expected-note@dx/linalg.h:177{{candidate template ignored: could not match 0 against 1}} + + OuterProductAccumulate(Input1, Input2, matrix); +} + +// now test for an error when matrix transpose parameter is true +export void Test4(vector Input1, vector Input2) { + using namespace dx::linalg; + + RWMatrixRef + matrix = {RWBuf, 0, 0}; + + // expected-error@+3{{no matching function for call to 'OuterProductAccumulate'}} + // expected-note@dx/linalg.h:177{{candidate template ignored: deduced conflicting types for parameter 'ElTy' ('int' vs. 'unsigned int')}} + + OuterProductAccumulate(Input1, Input2, matrix); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/unavailable-pre-sm69.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/unavailable-pre-sm69.hlsl new file mode 100644 index 0000000000..57683b9a59 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/unavailable-pre-sm69.hlsl @@ -0,0 +1,59 @@ +// RUN: %dxc -T lib_6_8 %s -verify + +ByteAddressBuffer matrix_buffer; +ByteAddressBuffer bias_buffer; +RWByteAddressBuffer rw_matrix_buffer; + +[Shader("compute")] +[Numthreads(1,1,1)] +void cs_main() +{ + vector output_vector; + static const uint is_output_unsigned = 0; + + vector input_vector; + const uint is_input_unsigned = 0; + const uint input_interpretation = 9; /*F32*/ + + const uint matrix_offset = 0; + const uint matrix_interpretation = 9; /*F32*/ + const uint matrix_dimM = 4; + const uint matrix_dimK = 4; + const uint matrix_layout = 0; /*RowMajor*/ + const bool matrix_is_transposed = false; + const uint matrix_stride = 64; + + //expected-error@+1{{intrinsic hlsl::__builtin_MatVecMul potentially used by ''cs_main'' requires shader model 6.9 or greater}} + __builtin_MatVecMul(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, + matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, + matrix_is_transposed, matrix_stride); + + const uint bias_offset = 0; + const uint bias_interpretation = 9; /*F32*/ + + //expected-error@+1{{intrinsic hlsl::__builtin_MatVecMulAdd potentially used by ''cs_main'' requires shader model 6.9 or greater}} + __builtin_MatVecMulAdd(output_vector, is_output_unsigned, input_vector, + is_input_unsigned, input_interpretation, matrix_buffer, matrix_offset, + matrix_interpretation, matrix_dimM, matrix_dimK, matrix_layout, + matrix_is_transposed, matrix_stride, bias_buffer, bias_offset, + bias_interpretation); + + vector input_vector1; + vector input_vector2; + const uint opa_matrix_offset = 0; + const uint opa_matrix_interpretation = 5; /*U32*/ + const uint opa_matrix_layout = 3; /*OuterProductOptimal*/ + const uint opa_matrix_stride = 0; + + //expected-error@+1{{intrinsic hlsl::__builtin_OuterProductAccumulate potentially used by ''cs_main'' requires shader model 6.9 or greater}} + __builtin_OuterProductAccumulate(input_vector1, input_vector2, + rw_matrix_buffer, opa_matrix_offset, opa_matrix_interpretation, + opa_matrix_layout, opa_matrix_stride); + + const uint va_matrix_offset = 0; + + //expected-error@+1{{intrinsic hlsl::__builtin_VectorAccumulate potentially used by ''cs_main'' requires shader model 6.9 or greater}} + __builtin_VectorAccumulate(input_vector1, rw_matrix_buffer, + va_matrix_offset); +} \ No newline at end of file diff --git a/tools/clang/test/SemaHLSL/hlsl/linalg/vectoraccumulate-errors.hlsl b/tools/clang/test/SemaHLSL/hlsl/linalg/vectoraccumulate-errors.hlsl new file mode 100644 index 0000000000..4c8ae6f049 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/linalg/vectoraccumulate-errors.hlsl @@ -0,0 +1,16 @@ +// XFAIL: * +// RUN: %dxc -I %hlsl_headers -T lib_6_9 %s | FileCheck %s + +#include + +RWByteAddressBuffer RWBuf; + +export void Test5(vector Input) { + using namespace dx::linalg; + + RWBuf.Store >(0, Input); + + // PREVIEW CHECK TODO: + // CHECK: Something about an error due to illegal conversions + VectorAccumulate(Input, RWBuf, 0); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl index baa3a07a5b..b091bd2ac5 100644 --- a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject-in-buffer.hlsl @@ -1,4 +1,4 @@ // RUN: %dxc -T lib_6_9 %s -verify -// expected-error@+1{{'dx::HitObject' is an object and cannot be used as a type parameter}} +// expected-error@+1{{object 'dx::HitObject' is not allowed in structured buffers}} RWStructuredBuffer InvalidBuffer; diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_accessors.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_accessors.hlsl new file mode 100644 index 0000000000..05aa790ad4 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_accessors.hlsl @@ -0,0 +1,263 @@ +// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s --check-prefix AST +// RUN: %dxc -T lib_6_9 -E main %s -fcgl | FileCheck %s --check-prefix FCGL + +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetHitKind +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetHitKind 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetHitKind 'unsigned int ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 366 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetInstanceID +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetInstanceID 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetInstanceID 'unsigned int ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 367 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetInstanceIndex +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetInstanceIndex 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetInstanceIndex 'unsigned int ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 368 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetObjectRayDirection +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetObjectRayDirection 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetObjectRayDirection 'vector ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'vector':'vector' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 369 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetObjectRayOrigin +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetObjectRayOrigin 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetObjectRayOrigin 'vector ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'vector':'vector' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 370 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetObjectToWorld3x4 +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetObjectToWorld3x4 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetObjectToWorld3x4 'matrix ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'matrix':'matrix' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 371 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetObjectToWorld4x3 +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetObjectToWorld4x3 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetObjectToWorld4x3 'matrix ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'matrix':'matrix' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 372 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetPrimitiveIndex +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetPrimitiveIndex 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetPrimitiveIndex 'unsigned int ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 373 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetRayFlags +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetRayFlags 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetRayFlags 'unsigned int ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 374 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetRayTCurrent +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetRayTCurrent 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetRayTCurrent 'float ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'float' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 375 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetRayTMin +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetRayTMin 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetRayTMin 'float ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'float' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 376 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetShaderTableIndex +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetShaderTableIndex 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetShaderTableIndex 'unsigned int ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 377 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetWorldRayDirection +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetWorldRayDirection 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetWorldRayDirection 'vector ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'vector':'vector' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 378 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetWorldRayOrigin +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetWorldRayOrigin 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetWorldRayOrigin 'vector ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'vector':'vector' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 379 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetWorldToObject3x4 +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetWorldToObject3x4 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetWorldToObject3x4 'matrix ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'matrix':'matrix' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 380 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetWorldToObject4x3 +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetWorldToObject4x3 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetWorldToObject4x3 'matrix ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'matrix':'matrix' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 381 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> IsHit +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit IsHit 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used IsHit 'bool ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'bool' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 383 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> IsMiss +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit IsMiss 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used IsMiss 'bool ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'bool' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 384 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> IsNop +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit IsNop 'TResult () const' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used IsNop 'bool ()' extern +// AST-NEXT: | | | |-TemplateArgument type 'bool' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 385 +// AST-NEXT: | | | |-ConstAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> LoadLocalRootTableConstant +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TRootConstantOffsetInBytes +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit LoadLocalRootTableConstant 'TResult (TRootConstantOffsetInBytes) const' +// AST-NEXT: | | | | `-ParmVarDecl {{[^ ]+}} <> RootConstantOffsetInBytes 'TRootConstantOffsetInBytes' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used LoadLocalRootTableConstant 'unsigned int (unsigned int)' extern +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> LoadLocalRootTableConstant 'unsigned int' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 386 +// AST-NEXT: | | | |-PureAttr {{[^ ]+}} <> Implicit +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> SetShaderTableIndex +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TRecordIndex +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit SetShaderTableIndex 'TResult (TRecordIndex) const' +// AST-NEXT: | | | | `-ParmVarDecl {{[^ ]+}} <> RecordIndex 'TRecordIndex' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used SetShaderTableIndex 'void (unsigned int)' extern +// AST-NEXT: | | | |-TemplateArgument type 'void' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> SetShaderTableIndex 'unsigned int' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 388 +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// FCGL: define void @"\01?main@@YAXXZ"() #0 { +// FCGL: %{{[^ ]+}} = call %dx.types.HitObject* @"dx.hl.op..%dx.types.HitObject* (i32, %dx.types.HitObject*)"(i32 358, %dx.types.HitObject* %[[HIT:[^ ]+]]) +// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, i32)"(i32 388, %dx.types.HitObject* %[[HIT]], i32 1) +// FCGL: %{{[^ ]+}} = call i1 @"dx.hl.op.rn.i1 (i32, %dx.types.HitObject*)"(i32 383, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i1 @"dx.hl.op.rn.i1 (i32, %dx.types.HitObject*)"(i32 384, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i1 @"dx.hl.op.rn.i1 (i32, %dx.types.HitObject*)"(i32 385, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 365, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 366, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 368, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 367, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 373, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 377, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.ro.i32 (i32, %dx.types.HitObject*, i32)"(i32 386, %dx.types.HitObject* %[[HIT]], i32 40) +// FCGL: %{{[^ ]+}} = call <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32 379, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32 378, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32 370, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call <3 x float> @"dx.hl.op.rn.<3 x float> (i32, %dx.types.HitObject*)"(i32 369, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call %class.matrix.float.3.4 @"dx.hl.op.rn.%class.matrix.float.3.4 (i32, %dx.types.HitObject*)"(i32 371, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call %class.matrix.float.4.3 @"dx.hl.op.rn.%class.matrix.float.4.3 (i32, %dx.types.HitObject*)"(i32 372, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call %class.matrix.float.3.4 @"dx.hl.op.rn.%class.matrix.float.3.4 (i32, %dx.types.HitObject*)"(i32 380, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call %class.matrix.float.4.3 @"dx.hl.op.rn.%class.matrix.float.4.3 (i32, %dx.types.HitObject*)"(i32 381, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call i32 @"dx.hl.op.rn.i32 (i32, %dx.types.HitObject*)"(i32 374, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call float @"dx.hl.op.rn.float (i32, %dx.types.HitObject*)"(i32 376, %dx.types.HitObject* %[[HIT]]) +// FCGL: %{{[^ ]+}} = call float @"dx.hl.op.rn.float (i32, %dx.types.HitObject*)"(i32 375, %dx.types.HitObject* %[[HIT]]) +// FCGL: ret void + +RWByteAddressBuffer outbuf; + +template +float hashM(in matrix mat) { + float h = 0.f; + for (int i = 0; i < M; ++i) + for (int j = 0; j < N; ++j) + h += mat[i][j]; + return h; +} + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + int isum = 0; + float fsum = 0.0f; + vector vsum = 0; + + ///// Setters + hit.SetShaderTableIndex(1); + + ///// Getters + + // i1 accessors + isum += hit.IsHit(); + isum += hit.IsMiss(); + isum += hit.IsNop(); + + // i32 accessors + isum += hit.GetGeometryIndex(); + isum += hit.GetHitKind(); + isum += hit.GetInstanceIndex(); + isum += hit.GetInstanceID(); + isum += hit.GetPrimitiveIndex(); + isum += hit.GetShaderTableIndex(); + isum += hit.LoadLocalRootTableConstant(40); + + // float3 accessors + vsum += hit.GetWorldRayOrigin(); + vsum += hit.GetWorldRayDirection(); + vsum += hit.GetObjectRayOrigin(); + vsum += hit.GetObjectRayDirection(); + fsum += vsum[0] + vsum[1] + vsum[2]; + + // matrix accessors + fsum += hashM<3, 4>(hit.GetObjectToWorld3x4()); + fsum += hashM<4, 3>(hit.GetObjectToWorld4x3()); + fsum += hashM<3, 4>(hit.GetWorldToObject3x4()); + fsum += hashM<4, 3>(hit.GetWorldToObject4x3()); + + // f32 accessors + isum += hit.GetRayFlags(); + fsum += hit.GetRayTMin(); + fsum += hit.GetRayTCurrent(); + + outbuf.Store(0, fsum); + outbuf.Store(4, isum); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes.hlsl new file mode 100644 index 0000000000..609d94f291 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes.hlsl @@ -0,0 +1,34 @@ +// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s --check-prefix AST +// RUN: %dxc -T lib_6_9 -E main %s -fcgl | FileCheck %s --check-prefix FCGL + + +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> GetAttributes +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TAttributes +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit GetAttributes 'TResult (TAttributes &) const' +// AST-NEXT: | | | | `-ParmVarDecl {{[^ ]+}} <> Attributes 'TAttributes &' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used GetAttributes 'void (CustomAttrs &)' extern +// AST-NEXT: | | | |-TemplateArgument type 'void' +// AST-NEXT: | | | |-TemplateArgument type 'CustomAttrs' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> GetAttributes 'CustomAttrs &&__restrict' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 364 +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %struct.CustomAttrs*)"(i32 364, %dx.types.HitObject* %{{[^ ]+}}, %struct.CustomAttrs* %{{[^ ]+}}) + +RWByteAddressBuffer outbuf; + +struct +CustomAttrs { + float4 v; + int y; +}; + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + CustomAttrs attrs; + hit.GetAttributes(attrs); + float sum = attrs.v.x + attrs.v.y + attrs.v.z + attrs.v.w + attrs.y; + outbuf.Store(0, sum); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_longvec.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_longvec.hlsl new file mode 100644 index 0000000000..97bb81a7cb --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_longvec.hlsl @@ -0,0 +1,15 @@ +// RUN: %dxc -T lib_6_9 -E main %s -verify + +struct +CustomAttrs { + vector v; + int y; +}; + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + // expected-error@+2{{vectors of over 4 elements in attributes are not supported}} + CustomAttrs attrs; + hit.GetAttributes(attrs); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_udt.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_udt.hlsl new file mode 100644 index 0000000000..f8935676c5 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_attributes_invalid_udt.hlsl @@ -0,0 +1,16 @@ +// RUN: %dxc -T lib_6_9 -E main %s -verify + +struct +CustomAttrs { + vector v; + RWStructuredBuffer buf; +}; + +[shader("raygeneration")] +void main() { + dx::HitObject hit; + CustomAttrs attrs; + hit.GetAttributes(attrs); + // expected-error@-1{{vectors of over 4 elements in attributes are not supported}} + // expected-error@-2{{attributes type must be a user-defined type composed of only numeric types}} +} diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl new file mode 100644 index 0000000000..e4a13d8a62 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_fromrayquery.hlsl @@ -0,0 +1,72 @@ +// RUN: %dxc -T lib_6_9 -E main %s -fcgl | FileCheck %s --check-prefix FCGL +// RUN: %dxc -T lib_6_9 -E main %s -ast-dump-implicit | FileCheck %s --check-prefix AST + +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> FromRayQuery +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class Trq +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit FromRayQuery 'TResult (Trq) const' static +// AST-NEXT: | | | | `-ParmVarDecl {{[^ ]+}} <> rq 'Trq' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used FromRayQuery 'dx::HitObject (RayQuery)' static +// AST-NEXT: | | | |-TemplateArgument type 'dx::HitObject' +// AST-NEXT: | | | |-TemplateArgument type 'RayQuery':'RayQuery<5, 0>' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> rq 'RayQuery':'RayQuery<5, 0>' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 363 +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// AST: | | |-FunctionTemplateDecl {{[^ ]+}} <> FromRayQuery +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TResult +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class Trq +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class THitKind +// AST-NEXT: | | | |-TemplateTypeParmDecl {{[^ ]+}} <> class TAttributes +// AST-NEXT: | | | |-CXXMethodDecl {{[^ ]+}} <> implicit FromRayQuery 'TResult (Trq, THitKind, TAttributes) const' static +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> rq 'Trq' +// AST-NEXT: | | | | |-ParmVarDecl {{[^ ]+}} <> HitKind 'THitKind' +// AST-NEXT: | | | | `-ParmVarDecl {{[^ ]+}} <> Attributes 'TAttributes' +// AST-NEXT: | | | `-CXXMethodDecl {{[^ ]+}} <> used FromRayQuery 'dx::HitObject (RayQuery, unsigned int, CustomAttrs)' static +// AST-NEXT: | | | |-TemplateArgument type 'dx::HitObject' +// AST-NEXT: | | | |-TemplateArgument type 'RayQuery':'RayQuery<5, 0>' +// AST-NEXT: | | | |-TemplateArgument type 'unsigned int' +// AST-NEXT: | | | |-TemplateArgument type 'CustomAttrs' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> rq 'RayQuery':'RayQuery<5, 0>' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> HitKind 'unsigned int' +// AST-NEXT: | | | |-ParmVarDecl {{[^ ]+}} <> Attributes 'CustomAttrs' +// AST-NEXT: | | | |-HLSLIntrinsicAttr {{[^ ]+}} <> Implicit "op" "" 363 +// AST-NEXT: | | | `-AvailabilityAttr {{[^ ]+}} <> Implicit 6.9 0 0 "" + +// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %\22class.RayQuery<5, 0>\22*)"(i32 363, %dx.types.HitObject* %[[HITPTR0:[^ ]+]], %"class.RayQuery<5, 0>"* %[[RQ:[^ ]+]]) +// FCGL-NEXT: call void @"\01?Use@@YAXVHitObject@dx@@@Z"(%dx.types.HitObject* %[[HITPTR0]]) +// FCGL: call void @"dx.hl.op..void (i32, %dx.types.HitObject*, %\22class.RayQuery<5, 0>\22*, i32, %struct.CustomAttrs*)"(i32 363, %dx.types.HitObject* %[[HITPTR1:[^ ]+]], %"class.RayQuery<5, 0>"* %[[RQ]], i32 16, %struct.CustomAttrs* %{{[^ ]+}}) +// FCGL-NEXT: call void @"\01?Use@@YAXVHitObject@dx@@@Z"(%dx.types.HitObject* %[[HITPTR1]]) + +RaytracingAccelerationStructure RTAS; +RWStructuredBuffer UAV : register(u0); + +RayDesc MakeRayDesc() { + RayDesc desc; + desc.Origin = float3(0, 0, 0); + desc.Direction = float3(1, 0, 0); + desc.TMin = 0.0f; + desc.TMax = 9999.0; + return desc; +} + +struct CustomAttrs { + float x; + float y; +}; + +void Use(in dx::HitObject hit) { + dx::MaybeReorderThread(hit); +} + +[shader("raygeneration")] +void main() { + RayQuery q; + RayDesc ray = MakeRayDesc(); + q.TraceRayInline(RTAS, RAY_FLAG_NONE, 0xFF, ray); + + Use(dx::HitObject::FromRayQuery(q)); + + CustomAttrs attrs = {1.f, 2.f}; + Use(dx::HitObject::FromRayQuery(q, 16, attrs)); +} diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_traceinvoke_payload.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_traceinvoke_payload.hlsl new file mode 100644 index 0000000000..f4781bc796 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_traceinvoke_payload.hlsl @@ -0,0 +1,27 @@ +// RUN: %dxc -T lib_6_9 %s -D TEST_NUM=0 %s -verify +// RUN: %dxc -T lib_6_9 %s -D TEST_NUM=1 %s -verify + +RaytracingAccelerationStructure scene : register(t0); + +struct Payload +{ + int a : read (caller, closesthit, miss) : write(caller, closesthit, miss); +}; + +struct Attribs +{ + float2 barys; +}; + +[shader("raygeneration")] +void RayGen() +{ +// expected-error@+1{{type 'Payload' used as payload requires that it is annotated with the [raypayload] attribute}} + Payload payload_in_rg; + RayDesc ray; +#if TEST_NUM == 0 + dx::HitObject::TraceRay( scene, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload_in_rg ); +#else + dx::HitObject::Invoke( dx::HitObject(), payload_in_rg ); +#endif +} \ No newline at end of file diff --git a/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_traceinvoke_payload_udt.hlsl b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_traceinvoke_payload_udt.hlsl new file mode 100644 index 0000000000..ee4ff8c020 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/objects/HitObject/hitobject_traceinvoke_payload_udt.hlsl @@ -0,0 +1,31 @@ +// RUN: %dxc -T lib_6_9 %s -verify + +struct +[raypayload] +Payload +{ + int a : read(closesthit, miss) : write(anyhit); + dx::HitObject hit; +}; + +struct +[raypayload] +PayloadLV +{ + int a : read(closesthit, miss) : write(anyhit); + vector b : read(closesthit, miss) : write(anyhit); +}; + +[shader("raygeneration")] +void RayGen() +{ + // expected-error@+3{{payload parameter 'payload_in_rg' must be a user-defined type composed of only numeric types}} + // expected-error@+2{{object 'dx::HitObject' is not allowed in payload parameters}} + // expected-note@8{{'dx::HitObject' field declared here}} + Payload payload_in_rg; + dx::HitObject::Invoke( dx::HitObject(), payload_in_rg ); + + // expected-error@+1{{vectors of over 4 elements in payload parameters are not supported}} + PayloadLV payload_with_lv; + dx::HitObject::Invoke( dx::HitObject(), payload_with_lv ); +} \ No newline at end of file diff --git a/tools/clang/test/SemaHLSL/hlsl/semantics/ExtendedCommandInformation/WrongShaderModel.hlsl b/tools/clang/test/SemaHLSL/hlsl/semantics/ExtendedCommandInformation/WrongShaderModel.hlsl index 667e1f4579..4bddf37acd 100644 --- a/tools/clang/test/SemaHLSL/hlsl/semantics/ExtendedCommandInformation/WrongShaderModel.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/semantics/ExtendedCommandInformation/WrongShaderModel.hlsl @@ -1,7 +1,5 @@ // TODO: use -verify instead of FileCheck after fix https://github.com/microsoft/DirectXShaderCompiler/issues/5768 -// -select-validator internal used to avoid downlevel validator testing -// incompatibility with shader model 6.7. -// RUN: not %dxc -E main -T vs_6_7 -select-validator internal %s 2>&1 | FileCheck %s --check-prefix=SM67 +// RUN: not %dxc -E main -T vs_6_7 %s 2>&1 | FileCheck %s --check-prefix=SM67 // SM67:invalid semantic 'SV_StartVertexLocation' for vs 6.7 // SM67:invalid semantic 'SV_StartInstanceLocation' for vs 6.7 diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-hs.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-hs.hlsl new file mode 100644 index 0000000000..3a4457bd5f --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-hs.hlsl @@ -0,0 +1,32 @@ +// RUN: %dxc -T hs_6_9 -verify %s + +struct HsConstantData { + float Edges[3] : SV_TessFactor; + dx::HitObject hit; +}; + +struct LongVec { + float4 f; + dx::HitObject hit; +}; + +HsConstantData +PatchConstantFunction( + // expected-error@-1{{object 'dx::HitObject' is not allowed in patch constant function return type}} + // expected-note@5{{'dx::HitObject' field declared here}} + dx::HitObject hit : V, + // expected-error@-1{{object 'dx::HitObject' is not allowed in patch constant function parameters}} + LongVec lv : L) + // expected-error@-1{{object 'dx::HitObject' is not allowed in patch constant function parameters}} + // expected-note@10{{'dx::HitObject' field declared here}} +{ + HsConstantData empty; + return empty; +} + +[domain("tri")] +[outputtopology("triangle_cw")] +[outputcontrolpoints(32)] +[patchconstantfunc("PatchConstantFunction")] +void main() { +} diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-struct.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-struct.hlsl new file mode 100644 index 0000000000..c852d17a1a --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-struct.hlsl @@ -0,0 +1,344 @@ +// RUN: %dxc -T lib_6_9 -DTYPE=HitStruct -verify %s +// RUN: %dxc -T lib_6_9 -DTYPE=HitStructSub -verify %s + + +#define PASTE_(x,y) x##y +#define PASTE(x,y) PASTE_(x,y) + +#ifndef TYPE +#define TYPE HitTpl +#endif + +// Add tests for base types and instantiated template classes with HitObjects + +struct HitStruct { + float4 f; + dx::HitObject hit; +}; + +struct HitStructSub : HitStruct { + int3 is; +}; + +template +struct HitTpl { + float4 f; + T val; +}; + +TYPE global_type; +// expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} +// expected-note@16{{'dx::HitObject' field declared here}} +dx::HitObject global_hit; +// expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} +dx::HitObject global_hit_arr[10]; +// expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + +static TYPE static_gv; +// expected-error@-1{{object 'dx::HitObject' is not allowed in global variables}} +// expected-note@16{{'dx::HitObject' field declared here}} + +cbuffer BadBuffy { + dx::HitObject cb_hit; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + dx::HitObject cb_hit_arr[10]; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} +}; + +tbuffer BadTuffy { + dx::HitObject tb_vec; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + dx::HitObject tb_vec_arr[10]; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + TYPE tb_vec_rec; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + // expected-note@16{{'dx::HitObject' field declared here}} + TYPE tb_vec_rec_arr[10]; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + // expected-note@16{{'dx::HitObject' field declared here}} +}; + +StructuredBuffer struct_buf; +// expected-error@-1{{object 'dx::HitObject' is not allowed in structured buffers}} +// expected-note@16{{'dx::HitObject' field declared here}} +RWStructuredBuffer rw_struct_buf; +// expected-error@-1{{object 'dx::HitObject' is not allowed in structured buffers}} +// expected-note@16{{'dx::HitObject' field declared here}} +ConstantBuffer const_buf; +// expected-error@-1{{object 'dx::HitObject' is not allowed in ConstantBuffers or TextureBuffers}} +// expected-note@16{{'dx::HitObject' field declared here}} +TextureBuffer tex_buf; +// expected-error@-1{{object 'dx::HitObject' is not allowed in ConstantBuffers or TextureBuffers}} +// expected-note@16{{'dx::HitObject' field declared here}} + +ByteAddressBuffer bab; +RWByteAddressBuffer rw_bab; + +[Shader("raygeneration")] +void main() +{ + bab.Load(0); + // expected-error@-1{{object 'dx::HitObject' is not allowed in builtin template parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + // expected-error@-3{{Explicit template arguments on intrinsic Load must be a single numeric type}} + rw_bab.Load(0); + // expected-error@-1{{object 'dx::HitObject' is not allowed in builtin template parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + // expected-error@-3{{Explicit template arguments on intrinsic Load must be a single numeric type}} + TYPE val; + rw_bab.Store(0, val); + // expected-error@-1{{object 'dx::HitObject' is not allowed in builtin template parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + // expected-error@-3{{Explicit template arguments on intrinsic Store must be a single numeric type}} +} + +[shader("pixel")] +TYPE ps_main( +// expected-error@-1{{object 'dx::HitObject' is not allowed in entry function return type}} +// expected-note@16{{'dx::HitObject' field declared here}} + TYPE vec : V) : SV_Target { + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + return vec; +} + +[shader("vertex")] +TYPE vs_main( +// expected-error@-1{{object 'dx::HitObject' is not allowed in entry function return type}} +// expected-note@16{{'dx::HitObject' field declared here}} + TYPE parm : P) : SV_Target { + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + parm.f = 0; + return parm; +} + + +[shader("geometry")] +[maxvertexcount(3)] +void gs_point( + line TYPE e, + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + inout PointStream OutputStream0) + // expected-error@-1{{object 'dx::HitObject' is not allowed in geometry streams}} + // expected-note@16{{'dx::HitObject' field declared here}} +{} + +[shader("geometry")] +[maxvertexcount(12)] +void gs_line( + line TYPE a, + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + inout LineStream OutputStream0) + // expected-error@-1{{object 'dx::HitObject' is not allowed in geometry streams}} + // expected-note@16{{'dx::HitObject' field declared here}} +{} + + +[shader("geometry")] +[maxvertexcount(12)] +void gs_tri( + triangle TYPE a, + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + inout TriangleStream OutputStream0) + // expected-error@-1{{object 'dx::HitObject' is not allowed in geometry streams}} + // expected-note@16{{'dx::HitObject' field declared here}} +{} + +[shader("domain")] +[domain("tri")] +void ds_main( + OutputPatch TrianglePatch) + // expected-error@-1{{object 'dx::HitObject' is not allowed in tessellation patches}} + // expected-note@16{{'dx::HitObject' field declared here}} +{} + +void patch_const( + InputPatch inpatch, + // expected-error@-1{{object 'dx::HitObject' is not allowed in tessellation patches}} + // expected-note@16{{'dx::HitObject' field declared here}} + OutputPatch outpatch) + // expected-error@-1{{object 'dx::HitObject' is not allowed in tessellation patches}} + // expected-note@16{{'dx::HitObject' field declared here}} +{} + +[shader("hull")] +[domain("tri")] +[outputtopology("triangle_cw")] +[outputcontrolpoints(32)] +[patchconstantfunc("patch_const")] +void hs_main(InputPatch TrianglePatch) {} +// expected-error@-1{{object 'dx::HitObject' is not allowed in tessellation patches}} +// expected-note@16{{'dx::HitObject' field declared here}} + +RaytracingAccelerationStructure RTAS; + +struct [raypayload] DXRHitStruct { + float4 f : write(closesthit) : read(caller); + TYPE hit : write(closesthit) : read(caller); +}; + +struct [raypayload] DXRHitStructSub : DXRHitStruct { + int3 is : write(closesthit) : read(caller); +}; + +template +struct [raypayload] DXRHitTpl { + float4 f : write(closesthit) : read(caller); + T hit : write(closesthit) : read(caller); +}; + +#define RTTYPE PASTE(DXR,TYPE) + + +TYPE userFunc(TYPE arg) { + return arg; +} + +[shader("raygeneration")] +void raygen() { + RTTYPE p = (RTTYPE)0; + RayDesc ray = (RayDesc)0; + TraceRay(RTAS, RAY_FLAG_NONE, 0, 0, 1, 0, ray, p); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@16{{'dx::HitObject' field declared here}} + CallShader(0, p); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@16{{'dx::HitObject' field declared here}} + TYPE val; + TYPE res = userFunc(val); +} + +[shader("closesthit")] +void closesthit( + inout RTTYPE payload, + // expected-error@-1{{payload parameter 'payload' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + in RTTYPE attribs) { + // expected-error@-1{{attributes parameter 'attribs' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + RayDesc ray; + TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@16{{'dx::HitObject' field declared here}} + CallShader(0, payload); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@16{{'dx::HitObject' field declared here}} +} + +[shader("anyhit")] +void AnyHit( + inout RTTYPE payload, + // expected-error@-1{{payload parameter 'payload' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + in RTTYPE attribs) + // expected-error@-1{{attributes parameter 'attribs' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} +{ +} + +[shader("miss")] +void Miss( + inout RTTYPE payload){ + // expected-error@-1{{payload parameter 'payload' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + RayDesc ray; + TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@16{{'dx::HitObject' field declared here}} + CallShader(0, payload); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@16{{'dx::HitObject' field declared here}} +} + +[shader("intersection")] +void Intersection() { + float hitT = RayTCurrent(); + RTTYPE attr = (RTTYPE)0; + bool bReported = ReportHit(hitT, 0, attr); + // expected-error@-1{{object 'dx::HitObject' is not allowed in attributes}} + // expected-note@16{{'dx::HitObject' field declared here}} +} + +[shader("callable")] +void callable1( + inout RTTYPE p) { + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@16{{'dx::HitObject' field declared here}} + // expected-error@-3{{callable parameter 'p' must be a user-defined type composed of only numeric types}} + CallShader(0, p); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@16{{'dx::HitObject' field declared here}} +} + +static groupshared TYPE gs_var; +// expected-error@-1{{object 'dx::HitObject' is not allowed in groupshared variables}} +// expected-note@16{{'dx::HitObject' field declared here}} + +[shader("amplification")] +[numthreads(1,1,1)] +void Amp() { + TYPE as_pld; + DispatchMesh(1,1,1,as_pld); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@16{{'dx::HitObject' field declared here}} +} + +struct NodeHitStruct { + uint3 grid : SV_DispatchGrid; + TYPE hit; +}; + +struct NodeHitStructSub : NodeHitStruct { + int3 is; +}; + +template +struct NodeHitTpl { + uint3 grid : SV_DispatchGrid; + T hit; +}; + +#define NTYPE PASTE(Node,TYPE) + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(8,1,1)] +[NodeMaxDispatchGrid(8, 1, 1)] +void broadcast( +// expected-error@-1{{Broadcasting node shader 'broadcast' with NodeMaxDispatchGrid attribute must declare an input record containing a field with SV_DispatchGrid semantic}} + DispatchNodeInputRecord input, + // expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} + // expected-note@16{{'dx::HitObject' field declared here}} + NodeOutput output) + // expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} + // expected-note@16{{'dx::HitObject' field declared here}} +{ + ThreadNodeOutputRecords touts; + // expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} + // expected-note@16{{'dx::HitObject' field declared here}} + GroupNodeOutputRecords gouts; + // expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} + // expected-note@16{{'dx::HitObject' field declared here}} +} + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(8,1,1)] +void coalesce(GroupNodeInputRecords input) {} +// expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} +// expected-note@16{{'dx::HitObject' field declared here}} + +[Shader("node")] +[NodeLaunch("thread")] +void threader(ThreadNodeInputRecord input) {} +// expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} +// expected-note@16{{'dx::HitObject' field declared here}} diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-templated.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-templated.hlsl new file mode 100644 index 0000000000..c2303a8608 --- /dev/null +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-hitobject-decls-templated.hlsl @@ -0,0 +1,340 @@ +// RUN: %dxc -T lib_6_9 -verify %s + + +#define PASTE_(x,y) x##y +#define PASTE(x,y) PASTE_(x,y) + +#define TYPE HitTpl + +// Add tests for base types and instantiated template classes with HitObjects + +struct HitStruct { + float4 f; + dx::HitObject hit; +}; + +struct HitStructSub : HitStruct { + int3 is; +}; + +template +struct HitTpl { + float4 f; + T val; +}; + +RaytracingAccelerationStructure RTAS; + +struct [raypayload] DXRHitStruct { + float4 f : write(closesthit) : read(caller); + TYPE hit : write(closesthit) : read(caller); +}; + +struct [raypayload] DXRHitStructSub : DXRHitStruct { + int3 is : write(closesthit) : read(caller); +}; + +template +struct [raypayload] DXRHitTpl { + float4 f : write(closesthit) : read(caller); + T hit : write(closesthit) : read(caller); +}; + +struct NodeHitStruct { + uint3 grid : SV_DispatchGrid; + TYPE hit; +}; + +struct NodeHitStructSub : NodeHitStruct { + int3 is; +}; + +template +struct NodeHitTpl { + uint3 grid : SV_DispatchGrid; + T hit; +}; + +TYPE global_type; +// expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} +// expected-note@23{{'dx::HitObject' field declared here}} +dx::HitObject global_hit; +// expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} +dx::HitObject global_hit_arr[10]; +// expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + +static TYPE static_gv; +// expected-error@-1{{object 'dx::HitObject' is not allowed in global variables}} +// expected-note@23{{'dx::HitObject' field declared here}} + +cbuffer BadBuffy { + dx::HitObject cb_hit; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + dx::HitObject cb_hit_arr[10]; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} +}; + +tbuffer BadTuffy { + dx::HitObject tb_vec; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + dx::HitObject tb_vec_arr[10]; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + TYPE tb_vec_rec; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + // expected-note@23{{'dx::HitObject' field declared here}} + TYPE tb_vec_rec_arr[10]; + // expected-error@-1{{object 'dx::HitObject' is not allowed in cbuffers or tbuffers}} + // expected-note@23{{'dx::HitObject' field declared here}} +}; + +StructuredBuffer struct_buf; +// expected-error@-1{{object 'dx::HitObject' is not allowed in structured buffers}} +// expected-note@23{{'dx::HitObject' field declared here}} +RWStructuredBuffer rw_struct_buf; +// expected-error@-1{{object 'dx::HitObject' is not allowed in structured buffers}} +// expected-note@23{{'dx::HitObject' field declared here}} +ConstantBuffer const_buf; +// expected-error@-1{{object 'dx::HitObject' is not allowed in ConstantBuffers or TextureBuffers}} +// expected-note@23{{'dx::HitObject' field declared here}} +TextureBuffer tex_buf; +// expected-error@-1{{object 'dx::HitObject' is not allowed in ConstantBuffers or TextureBuffers}} +// expected-note@23{{'dx::HitObject' field declared here}} + +ByteAddressBuffer bab; +RWByteAddressBuffer rw_bab; + +[Shader("raygeneration")] +void main() +{ + bab.Load(0); + // expected-error@-1{{object 'dx::HitObject' is not allowed in builtin template parameters}} + // expected-note@23{{'dx::HitObject' field declared here}} + // expected-error@-3{{Explicit template arguments on intrinsic Load must be a single numeric type}} + rw_bab.Load(0); + // expected-error@-1{{object 'dx::HitObject' is not allowed in builtin template parameters}} + // expected-note@23{{'dx::HitObject' field declared here}} + // expected-error@-3{{Explicit template arguments on intrinsic Load must be a single numeric type}} + TYPE val; + rw_bab.Store(0, val); + // expected-error@-1{{object 'dx::HitObject' is not allowed in builtin template parameters}} + // expected-note@23{{'dx::HitObject' field declared here}} + // expected-error@-3{{Explicit template arguments on intrinsic Store must be a single numeric type}} +} + +[shader("pixel")] +TYPE ps_main( +// expected-error@-1{{object 'dx::HitObject' is not allowed in entry function return type}} +// expected-note@23{{'dx::HitObject' field declared here}} + TYPE vec : V) : SV_Target { + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@23{{'dx::HitObject' field declared here}} + return vec; +} + +[shader("vertex")] +TYPE vs_main( +// expected-error@-1{{object 'dx::HitObject' is not allowed in entry function return type}} +// expected-note@23{{'dx::HitObject' field declared here}} + TYPE parm : P) : SV_Target { + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@23{{'dx::HitObject' field declared here}} + parm.f = 0; + return parm; +} + + +[shader("geometry")] +[maxvertexcount(3)] +void gs_point( + line TYPE e, + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@23{{'dx::HitObject' field declared here}} + inout PointStream OutputStream0) + // expected-error@-1{{object 'dx::HitObject' is not allowed in geometry streams}} + // expected-note@23{{'dx::HitObject' field declared here}} +{} + +[shader("geometry")] +[maxvertexcount(12)] +void gs_line( + line TYPE a, + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@23{{'dx::HitObject' field declared here}} + inout LineStream OutputStream0) + // expected-error@-1{{object 'dx::HitObject' is not allowed in geometry streams}} + // expected-note@23{{'dx::HitObject' field declared here}} +{} + + +[shader("geometry")] +[maxvertexcount(12)] +void gs_line( + line TYPE a, + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@23{{'dx::HitObject' field declared here}} + inout TriangleStream OutputStream0) + // expected-error@-1{{object 'dx::HitObject' is not allowed in geometry streams}} + // expected-note@23{{'dx::HitObject' field declared here}} +{} + +[shader("domain")] +[domain("tri")] +void ds_main( + OutputPatch TrianglePatch) + // expected-error@-1{{object 'dx::HitObject' is not allowed in tessellation patches}} + // expected-note@23{{'dx::HitObject' field declared here}} +{} + +void patch_const( + InputPatch inpatch, + // expected-error@-1{{object 'dx::HitObject' is not allowed in tessellation patches}} + // expected-note@23{{'dx::HitObject' field declared here}} + OutputPatch outpatch) + // expected-error@-1{{object 'dx::HitObject' is not allowed in tessellation patches}} + // expected-note@23{{'dx::HitObject' field declared here}} +{} + +[shader("hull")] +[domain("tri")] +[outputtopology("triangle_cw")] +[outputcontrolpoints(32)] +[patchconstantfunc("patch_const")] +void hs_main(InputPatch TrianglePatch) {} +// expected-error@-1{{object 'dx::HitObject' is not allowed in tessellation patches}} +// expected-note@23{{'dx::HitObject' field declared here}} + +#define RTTYPE PASTE(DXR,TYPE) + +TYPE userFunc(TYPE arg) { + return arg; +} + +[shader("raygeneration")] +void raygen() { + RTTYPE p = (RTTYPE)0; + RayDesc ray = (RayDesc)0; + TraceRay(RTAS, RAY_FLAG_NONE, 0, 0, 1, 0, ray, p); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@40{{'dx::HitObject' field declared here}} + CallShader(0, p); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@40{{'dx::HitObject' field declared here}} + TYPE val; + TYPE res = userFunc(val); +} + +[shader("closesthit")] +void closesthit( + inout RTTYPE payload, + // expected-error@-1{{payload parameter 'payload' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@40{{'dx::HitObject' field declared here}} + in RTTYPE attribs) { + // expected-error@-1{{attributes parameter 'attribs' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@40{{'dx::HitObject' field declared here}} + RayDesc ray; + TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@40{{'dx::HitObject' field declared here}} + CallShader(0, payload); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@40{{'dx::HitObject' field declared here}} +} + +[shader("anyhit")] +void AnyHit( + inout RTTYPE payload, + // expected-error@-1{{payload parameter 'payload' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@40{{'dx::HitObject' field declared here}} + in RTTYPE attribs) + // expected-error@-1{{attributes parameter 'attribs' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@40{{'dx::HitObject' field declared here}} +{ +} + +[shader("miss")] +void Miss( + inout RTTYPE payload){ + // expected-error@-1{{payload parameter 'payload' must be a user-defined type composed of only numeric types}} + // expected-error@-2{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@40{{'dx::HitObject' field declared here}} + RayDesc ray; + TraceRay( RTAS, RAY_FLAG_NONE, 0xff, 0, 1, 0, ray, payload ); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@40{{'dx::HitObject' field declared here}} + CallShader(0, payload); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@40{{'dx::HitObject' field declared here}} +} + +[shader("intersection")] +void Intersection() { + float hitT = RayTCurrent(); + RTTYPE attr = (RTTYPE)0; + bool bReported = ReportHit(hitT, 0, attr); + // expected-error@-1{{object 'dx::HitObject' is not allowed in attributes}} + // expected-note@40{{'dx::HitObject' field declared here}} +} + +[shader("callable")] +void callable1( + inout RTTYPE p) { + // expected-error@-1{{object 'dx::HitObject' is not allowed in entry function parameters}} + // expected-note@40{{'dx::HitObject' field declared here}} + // expected-error@-3{{callable parameter 'p' must be a user-defined type composed of only numeric types}} + CallShader(0, p); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@40{{'dx::HitObject' field declared here}} +} + +static groupshared TYPE gs_var; +// expected-error@-1{{object 'dx::HitObject' is not allowed in groupshared variables}} +// expected-note@23{{'dx::HitObject' field declared here}} + +[shader("amplification")] +[numthreads(1,1,1)] +void Amp() { + TYPE as_pld; + DispatchMesh(1,1,1,as_pld); + // expected-error@-1{{object 'dx::HitObject' is not allowed in user-defined struct parameter}} + // expected-note@23{{'dx::HitObject' field declared here}} +} + +#define NTYPE PASTE(Node,TYPE) + +[Shader("node")] +[NodeLaunch("broadcasting")] +[NumThreads(8,1,1)] +[NodeMaxDispatchGrid(8, 1, 1)] +void broadcast( +// expected-error@-1{{Broadcasting node shader 'broadcast' with NodeMaxDispatchGrid attribute must declare an input record containing a field with SV_DispatchGrid semantic}} + DispatchNodeInputRecord input, + // expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} + // expected-note@55{{'dx::HitObject' field declared here}} + NodeOutput output) + // expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} + // expected-note@23{{'dx::HitObject' field declared here}} +{ + ThreadNodeOutputRecords touts; + // expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} + // expected-note@23{{'dx::HitObject' field declared here}} + GroupNodeOutputRecords gouts; + // expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} + // expected-note@23{{'dx::HitObject' field declared here}} +} + +[Shader("node")] +[NodeLaunch("coalescing")] +[NumThreads(8,1,1)] +void coalesce(GroupNodeInputRecords input) {} +// expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} +// expected-note@23{{'dx::HitObject' field declared here}} + +[Shader("node")] +[NodeLaunch("thread")] +void threader(ThreadNodeInputRecord input) {} +// expected-error@-1{{object 'dx::HitObject' is not allowed in node records}} +// expected-note@23{{'dx::HitObject' field declared here}} diff --git a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl index 0604feeaec..96c5d4b5f4 100644 --- a/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/types/invalid-longvec-decls.hlsl @@ -146,7 +146,7 @@ void Miss(inout RTTYPE payload){ // expected-error{{vectors of over 4 elements i void Intersection() { float hitT = RayTCurrent(); RTTYPE attr = (RTTYPE)0; - bool bReported = ReportHit(hitT, 0, attr); // expected-error{{vectors of over 4 elements in user-defined struct parameter are not supported}} + bool bReported = ReportHit(hitT, 0, attr); // expected-error{{vectors of over 4 elements in attributes are not supported}} } [shader("callable")] diff --git a/tools/clang/test/SemaHLSL/hlsl/workgraph/invalid_node_record_type.hlsl b/tools/clang/test/SemaHLSL/hlsl/workgraph/invalid_node_record_type.hlsl index 40b820a1b4..de523d51d1 100644 --- a/tools/clang/test/SemaHLSL/hlsl/workgraph/invalid_node_record_type.hlsl +++ b/tools/clang/test/SemaHLSL/hlsl/workgraph/invalid_node_record_type.hlsl @@ -76,7 +76,7 @@ void node07(RWThreadNodeInputRecord input) // expected-error {{'f2x2' (aka [Shader("node")] [NodeLaunch("thread")] -void node08(ThreadNodeInputRecord input) // expected-error {{object 'SamplerState' may not appear in a node record}} +void node08(ThreadNodeInputRecord input) // expected-error {{object 'SamplerState' is not allowed in node records}} { } [Shader("node")] @@ -86,17 +86,17 @@ void node09(ThreadNodeInputRecord input) // expected-error {{'BAD [Shader("node")] [NodeLaunch("thread")] -void node10(RWThreadNodeInputRecord input) // expected-error {{object 'SamplerState' may not appear in a node record}} +void node10(RWThreadNodeInputRecord input) // expected-error {{object 'SamplerState' is not allowed in node records}} { } [Shader("node")] [NodeLaunch("thread")] -void node11(NodeOutput input) // expected-error {{object 'SamplerState' may not appear in a node record}} +void node11(NodeOutput input) // expected-error {{object 'SamplerState' is not allowed in node records}} { } [Shader("node")] [NodeLaunch("thread")] -void node12(NodeOutputArray output) // expected-error {{object 'SamplerState' may not appear in a node record}} +void node12(NodeOutputArray output) // expected-error {{object 'SamplerState' is not allowed in node records}} { } [Shader("node")] @@ -129,7 +129,7 @@ void node16() ThreadNodeOutputRecords outrec2; // expected-error {{'f2x2' (aka 'matrix') is not valid as a node record type - struct/class required}} - GroupNodeOutputRecords outrec3; // expected-error {{object 'SamplerState' may not appear in a node record}} + GroupNodeOutputRecords outrec3; // expected-error {{object 'SamplerState' is not allowed in node records}} ThreadNodeOutputRecords outrec4; // expected-error {{'SamplerState' is not valid as a node record type - struct/class required}} } @@ -151,10 +151,10 @@ void node17(ThreadNodeInputRecord > input) [Shader("node")] [NodeLaunch("thread")] -void node18(ThreadNodeInputRecord > input) // expected-error {{object 'SamplerState' may not appear in a node record}} +void node18(ThreadNodeInputRecord > input) // expected-error {{object 'SamplerState' is not allowed in node records}} { } [Shader("node")] [NodeLaunch("thread")] -void node19(RWThreadNodeInputRecord input) // expected-error {{object 'SamplerState' may not appear in a node record}} +void node19(RWThreadNodeInputRecord input) // expected-error {{object 'SamplerState' is not allowed in node records}} { } diff --git a/tools/clang/test/SemaHLSL/raytracing-entry-diags.hlsl b/tools/clang/test/SemaHLSL/raytracing-entry-diags.hlsl index e41c6a2f4f..8dfc927e11 100644 --- a/tools/clang/test/SemaHLSL/raytracing-entry-diags.hlsl +++ b/tools/clang/test/SemaHLSL/raytracing-entry-diags.hlsl @@ -181,3 +181,24 @@ void callable7(inout MyPayload payload, float F) {} [shader("callable")] float callable8(inout MyPayload payload) {} // expected-error{{return type for 'callable' shaders must be void}} + +// expected-note@+1 6 {{forward declaration of 'Incomplete'}} +struct Incomplete; + +// expected-error@+3{{variable has incomplete type 'Incomplete'}} +// expected-error@+2{{variable has incomplete type '__restrict Incomplete'}} +[shader("anyhit")] +void anyhit_incomplete( inout Incomplete A1, Incomplete A2) { } + +// expected-error@+3{{variable has incomplete type 'Incomplete'}} +// expected-error@+2{{variable has incomplete type '__restrict Incomplete'}} +[shader("closesthit")] +void closesthit_incomplete( inout Incomplete payload, Incomplete attr ) {} + +// expected-error@+2{{variable has incomplete type '__restrict Incomplete'}} +[shader("miss")] +void miss_incomplete( inout Incomplete payload) { } + +// expected-error@+2{{variable has incomplete type '__restrict Incomplete'}} +[shader("callable")] +void callable_incomplete(inout Incomplete payload) {} diff --git a/tools/clang/test/SemaHLSL/raytracings.hlsl b/tools/clang/test/SemaHLSL/raytracings.hlsl index d3bc01fcd6..429037f22b 100644 --- a/tools/clang/test/SemaHLSL/raytracings.hlsl +++ b/tools/clang/test/SemaHLSL/raytracings.hlsl @@ -12,14 +12,14 @@ void run() { RAY_FLAG_CULL_OPAQUE + RAY_FLAG_CULL_NON_OPAQUE; - rayFlags += RAY_FLAG_INVALID; /* expected-note@? {{'RAY_FLAG_NONE' declared here}} expected-error {{use of undeclared identifier 'RAY_FLAG_INVALID'; did you mean 'RAY_FLAG_NONE'?}} */ + rayFlags += RAY_FLAG_INVALID; /* expected-error {{use of undeclared identifier 'RAY_FLAG_INVALID'; did you mean 'RAY_FLAG_NONE'?}} */ int intFlag = RAY_FLAG_CULL_OPAQUE; int hitKindFlag = HIT_KIND_TRIANGLE_FRONT_FACE + HIT_KIND_TRIANGLE_BACK_FACE; - hitKindFlag += HIT_KIND_INVALID; /* expected-note@? {{'HIT_KIND_NONE' declared here}} expected-error {{use of undeclared identifier 'HIT_KIND_INVALID'; did you mean 'HIT_KIND_NONE'?}} */ + hitKindFlag += HIT_KIND_INVALID; /* expected-error {{use of undeclared identifier 'HIT_KIND_INVALID'; did you mean 'HIT_KIND_NONE'?}} */ BuiltInTriangleIntersectionAttributes attr; diff --git a/tools/clang/test/SemaHLSL/sizeof-requires-complete-type.hlsl b/tools/clang/test/SemaHLSL/sizeof-requires-complete-type.hlsl new file mode 100644 index 0000000000..31d4898efe --- /dev/null +++ b/tools/clang/test/SemaHLSL/sizeof-requires-complete-type.hlsl @@ -0,0 +1,27 @@ +// RUN: %dxc -T lib_6_3 -verify %s + +struct Complete {}; + +struct Incomplete; // expected-note{{forward declaration of 'Incomplete'}} +template struct CompleteTemplate {}; + +void fn() { + uint s; + // Complete types are easy. They are complete before we get to the expression. + s = sizeof(Complete); // This works! + + // A type may be incomplete for several reasons. + + // It may be incomplete because there is only a forward declaration, which + // should produce an error since we can't materialize a definition. + s = sizeof(Incomplete); // expected-error{{invalid application of 'sizeof' to an incomplete type 'Incomplete'}} + + // It may be incomplete because it is an un-instantiated template, which + // should work because we can just instantiate it. + s = sizeof(CompleteTemplate); // This works! + + // It may be incomplete because it is a lazy-initialized type from HLSL, + // which can be completed, and then will report a non-numeric type error. + // expected-error@+1{{invalid application of 'sizeof' to non-numeric type 'Buffer'}} + s = sizeof(Buffer); +} diff --git a/tools/clang/test/SemaHLSL/template-checks.hlsl b/tools/clang/test/SemaHLSL/template-checks.hlsl index d0d736fc1f..751e89b652 100644 --- a/tools/clang/test/SemaHLSL/template-checks.hlsl +++ b/tools/clang/test/SemaHLSL/template-checks.hlsl @@ -1,8 +1,8 @@ // RUN: %dxc -Tlib_6_3 -verify %s Texture2D t_float4; -Texture2D t_obj_sampler; /* expected-error {{'SamplerState' is an object and cannot be used as a type parameter}} fxc-error {{X3124: object element type cannot be an object type}} */ -Texture2D > t_obj_tex; /* expected-error {{'Texture2D' is an object and cannot be used as a type parameter}} fxc-error {{X3124: object element type cannot be an object type}} */ +Texture2D t_obj_sampler; /* expected-error {{object 'SamplerState' is not allowed in builtin template parameters}} fxc-error {{X3124: object element type cannot be an object type}} */ +Texture2D > t_obj_tex; /* expected-error {{object 'Texture2D' is not allowed in builtin template parameters}} fxc-error {{X3124: object element type cannot be an object type}} */ matrix m_obj_sampler; /* expected-error {{'SamplerState' cannot be used as a type parameter where a scalar is required}} fxc-error {{X3123: matrix element type must be a scalar type}} */ matrix m_bool; @@ -15,7 +15,7 @@ matrix m_bool; RWBuffer rwb_struct; /* expected-error {{elements of typed buffers and textures must fit in four 32-bit quantities}} fxc-error {{X3037: elements of typed buffers and textures must fit in four 32-bit quantities}} */ -RWBuffer rwb_struct_objs; /* expected-error {{'SamplerState' is an object and cannot be used as a type parameter}} */ +RWBuffer rwb_struct_objs; /* expected-error {{object 'SamplerState' is not allowed in builtin template parameters}} */ void vain() { // Nothing to do here. diff --git a/tools/clang/test/SemaHLSL/template-udt-load.hlsl b/tools/clang/test/SemaHLSL/template-udt-load.hlsl index 591f27b384..dd7cf8bd16 100644 --- a/tools/clang/test/SemaHLSL/template-udt-load.hlsl +++ b/tools/clang/test/SemaHLSL/template-udt-load.hlsl @@ -4,10 +4,51 @@ ByteAddressBuffer In; RWBuffer Out; +template +struct Foo { + // expected-note@+1{{'RWBuffer' field declared here}} + T Member; +}; + +template +struct MyTemplate { + T GetValue(ByteAddressBuffer srv, uint offset) { + // expected-error@+2{{Explicit template arguments on intrinsic Load must be a single numeric type}} + // expected-error@+1{{object 'RWBuffer' is not allowed in builtin template parameters}} + return srv.Load(offset); + } +}; +template +T GetValue(uint offset) { + MyTemplate myTemplate; + // expected-error@+2{{scalar, vector, or matrix expected}} + // expected-note@+1{{in instantiation of member function 'MyTemplate >::GetValue' requested here}} + return myTemplate.GetValue(In, offset) + + // expected-error@+2{{Explicit template arguments on intrinsic Load must be a single numeric type}} + // expected-error@+1{{object 'RWBuffer' is not allowed in builtin template parameters}} + In.Load >(offset + 4).Member; +} + +// expected-note@+1{{forward declaration of 'Incomplete'}} +struct Incomplete; + [shader("compute")] [numthreads(1,1,1)] void main() { - RWBuffer FB = In.Load >(0); // expected-error {{Explicit template arguments on intrinsic Load must be a single numeric type}} + RWBuffer FB = In.Load >(0); + // expected-error@-1{{Explicit template arguments on intrinsic Load must be a single numeric type}} + // expected-error@-2{{object 'RWBuffer' is not allowed in builtin template parameters}} + Out[0] = FB[0]; + + // Ok: + Out[4] = GetValue(4); + + // expected-note@?{{'Load' declared here}} + // expected-error@+1{{calling 'Load' with incomplete return type 'Incomplete'}} + Out[8] = In.Load(8); + + // expected-note@+1 2 {{in instantiation of function template specialization 'GetValue >' requested here}} + RWBuffer FB2 = GetValue >(16); } diff --git a/tools/clang/test/SemaHLSL/using-namespace-dx-errors.hlsl b/tools/clang/test/SemaHLSL/using-namespace-dx-errors.hlsl new file mode 100644 index 0000000000..233ce103ce --- /dev/null +++ b/tools/clang/test/SemaHLSL/using-namespace-dx-errors.hlsl @@ -0,0 +1,42 @@ +// RUN: %dxc -T lib_6_9 %s -verify + +RaytracingAccelerationStructure Scene : register(t0, space0); + +struct[raypayload] RayPayload { + float4 color : write(caller) : read(closesthit); +}; + +[shader("raygeneration")] void MyRaygenShader() { + // Set the ray's extents. + RayDesc ray; + ray.Origin = float3(0, 0, 1); + ray.Direction = float3(1, 0, 0); + ray.TMin = 0.001; + ray.TMax = 10000.0; + + RayPayload payload = {float4(0, 0, 0, 0)}; + + { + using namespace dx; + HitObject hit = + HitObject::TraceRay(Scene, RAY_FLAG_NONE, ~0, 0, 1, 0, + ray, payload); + + int sortKey = 1; + MaybeReorderThread(sortKey, 1); + } + + { + int sortKey = 1; + MaybeReorderThread(sortKey, 1); // expected-error{{use of undeclared identifier 'MaybeReorderThread'; did you mean 'MaybeReorderThread'?}} + } + + int sortKey = 1; + MaybeReorderThread(sortKey, 1); // expected-error{{use of undeclared identifier 'MaybeReorderThread'; did you mean 'MaybeReorderThread'?}} + + HitObject hit = // expected-error{{unknown type name 'HitObject'}} + HitObject::TraceRay(Scene, RAY_FLAG_NONE, ~0, 0, 1, 0, + ray, payload); + + HitObject::Invoke(hit, payload); // expected-error{{use of undeclared identifier 'HitObject'}} +} diff --git a/tools/clang/test/SemaHLSL/using-namespace-dx.hlsl b/tools/clang/test/SemaHLSL/using-namespace-dx.hlsl new file mode 100644 index 0000000000..093e86b2fa --- /dev/null +++ b/tools/clang/test/SemaHLSL/using-namespace-dx.hlsl @@ -0,0 +1,56 @@ +// RUN: %dxc -T lib_6_9 -ast-dump-implicit %s | FileCheck %s + +RaytracingAccelerationStructure Scene : register(t0, space0); + +struct[raypayload] RayPayload { + float4 color : write(caller) : read(closesthit); +}; + +namespace MyStuff { + using namespace dx; + void MaybeReorderThread(int2 V); +} + +void MyStuff::MaybeReorderThread(int2 V) { + MaybeReorderThread(V.x, V.y); +} + +[shader("raygeneration")] void MyRaygenShader() { + // Set the ray's extents. + RayDesc ray; + ray.Origin = float3(0, 0, 1); + ray.Direction = float3(1, 0, 0); + ray.TMin = 0.001; + ray.TMax = 10000.0; + + RayPayload payload = {float4(0, 0, 0, 0)}; + + using namespace dx; + HitObject hit = + HitObject::TraceRay(Scene, RAY_FLAG_NONE, ~0, 0, 1, 0, + ray, payload); + + int sortKey = 1; + MaybeReorderThread(sortKey, 1); + + HitObject::Invoke(hit, payload); + + MyStuff::MaybeReorderThread(int2(sortKey, 1)); +} + +// Find the DeclRefExpr for the call to MaybeReorderThread: + +// CHECK: FunctionDecl [[MyDeclAddr:0x[0-9a-fA-F]+]] parent {{.*}} used MaybeReorderThread 'void (int2)' +// CHECK: DeclRefExpr {{.*}} 'void (unsigned int, unsigned int)' lvalue Function [[DeclAddr:0x[0-9a-fA-F]+]] 'MaybeReorderThread' 'void (unsigned int, unsigned int)' + +// CHECK: FunctionDecl [[DeclAddr]] <> implicit used MaybeReorderThread 'void (unsigned int, unsigned int)' extern +// CHECK-NEXT: ParmVarDecl {{.*}} CoherenceHint 'unsigned int' +// CHECK-NEXT: ParmVarDecl {{.*}} NumCoherenceHintBitsFromLSB 'unsigned int' +// CHECK-NEXT: HLSLIntrinsicAttr {{.*}} Implicit "op" "" 359 +// CHECK-NEXT: AvailabilityAttr {{.*}} Implicit 6.9 0 0 "" + +// CHECK-LABEL: MyRaygenShader + +// CHECK: DeclRefExpr {{.*}} 'void (unsigned int, unsigned int)' lvalue Function [[DeclAddr:0x[0-9a-fA-F]+]] 'MaybeReorderThread' 'void (unsigned int, unsigned int)' +// CHECK: DeclRefExpr {{.*}} 'void (int2)' lvalue Function [[MyDeclAddr:0x[0-9a-fA-F]+]] 'MaybeReorderThread' 'void (int2)' + diff --git a/tools/clang/tools/dxcompiler/CMakeLists.txt b/tools/clang/tools/dxcompiler/CMakeLists.txt index c69e276194..26bf0e5d98 100644 --- a/tools/clang/tools/dxcompiler/CMakeLists.txt +++ b/tools/clang/tools/dxcompiler/CMakeLists.txt @@ -57,7 +57,6 @@ set(SOURCES DXCompiler.rc DXCompiler.def dxcfilesystem.cpp - dxillib.cpp dxcutil.cpp dxcdisassembler.cpp dxcpdbutils.cpp @@ -75,7 +74,6 @@ set(SOURCES dxcutil.cpp dxcdisassembler.cpp dxcpdbutils.cpp - dxillib.cpp dxcvalidator.cpp dxclinker.cpp dxcshadersourceinfo.cpp diff --git a/tools/clang/tools/dxcompiler/DXCompiler.cpp b/tools/clang/tools/dxcompiler/DXCompiler.cpp index c548441449..c7ffcbffa1 100644 --- a/tools/clang/tools/dxcompiler/DXCompiler.cpp +++ b/tools/clang/tools/dxcompiler/DXCompiler.cpp @@ -19,7 +19,6 @@ #ifdef LLVM_ON_WIN32 #include "dxcetw.h" #endif -#include "dxillib.h" namespace hlsl { HRESULT SetupRegistryPassForHLSL(); @@ -65,7 +64,6 @@ static HRESULT InitMaybeFail() throw() { fsSetup = true; IFC(hlsl::SetupRegistryPassForHLSL()); IFC(hlsl::SetupRegistryPassForPIX()); - IFC(DxilLibInitialize()); if (hlsl::options::initHlslOptTable()) { hr = E_FAIL; goto Cleanup; @@ -110,12 +108,6 @@ BOOL WINAPI DllMain(HINSTANCE hinstDLL, DWORD Reason, LPVOID reserved) { ::hlsl::options::cleanupHlslOptTable(); ::llvm::sys::fs::CleanupPerThreadFileSystem(); ::llvm::llvm_shutdown(); - if (reserved == - NULL) { // FreeLibrary has been called or the DLL load failed - DxilLibCleanup(DxilLibCleanUpType::UnloadLibrary); - } else { // Process termination. We should not call FreeLibrary() - DxilLibCleanup(DxilLibCleanUpType::ProcessTermination); - } DxcClearThreadMalloc(); DxcCleanupThreadMalloc(); DxcEtw_DXCompilerShutdown_Stop(S_OK); diff --git a/tools/clang/tools/dxcompiler/dxcapi.cpp b/tools/clang/tools/dxcompiler/dxcapi.cpp index a6a877cba4..d4e85bc35c 100644 --- a/tools/clang/tools/dxcompiler/dxcapi.cpp +++ b/tools/clang/tools/dxcompiler/dxcapi.cpp @@ -25,7 +25,6 @@ #include "dxcetw.h" #endif #include "dxc/DxilContainer/DxcContainerBuilder.h" -#include "dxillib.h" #include HRESULT CreateDxcCompiler(REFIID riid, _Out_ LPVOID *ppv); @@ -59,20 +58,11 @@ HRESULT CreateDxcContainerReflection(REFIID riid, _Out_ LPVOID *ppv) { HRESULT CreateDxcContainerBuilder(REFIID riid, _Out_ LPVOID *ppv) { // Call dxil.dll's containerbuilder *ppv = nullptr; - const char *warning; - HRESULT hr = DxilLibCreateInstance(CLSID_DxcContainerBuilder, - (IDxcContainerBuilder **)ppv); - if (FAILED(hr)) { - warning = "Unable to create container builder from dxil.dll. Resulting " - "container will not be signed.\n"; - } else { - return hr; - } CComPtr Result = DxcContainerBuilder::Alloc(DxcGetThreadMallocNoRef()); IFROOM(Result.p); - Result->Init(warning); + Result->Init(); return Result->QueryInterface(riid, ppv); } @@ -87,11 +77,7 @@ static HRESULT ThreadMallocDxcCreateInstance(REFCLSID rclsid, REFIID riid, } else if (IsEqualCLSID(rclsid, CLSID_DxcUtils)) { hr = CreateDxcUtils(riid, ppv); } else if (IsEqualCLSID(rclsid, CLSID_DxcValidator)) { - if (DxilLibIsEnabled()) { - hr = DxilLibCreateInstance(rclsid, riid, (IUnknown **)ppv); - } else { - hr = CreateDxcValidator(riid, ppv); - } + hr = CreateDxcValidator(riid, ppv); } else if (IsEqualCLSID(rclsid, CLSID_DxcAssembler)) { hr = CreateDxcAssembler(riid, ppv); } else if (IsEqualCLSID(rclsid, CLSID_DxcOptimizer)) { diff --git a/tools/clang/tools/dxcompiler/dxcassembler.cpp b/tools/clang/tools/dxcompiler/dxcassembler.cpp index 0ff2abe26c..6622e93cbc 100644 --- a/tools/clang/tools/dxcompiler/dxcassembler.cpp +++ b/tools/clang/tools/dxcompiler/dxcassembler.cpp @@ -19,7 +19,6 @@ #include "dxc/Support/dxcfilesystem.h" #include "dxc/Support/microcom.h" #include "dxcutil.h" -#include "dxillib.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/LLVMContext.h" diff --git a/tools/clang/tools/dxcompiler/dxclinker.cpp b/tools/clang/tools/dxcompiler/dxclinker.cpp index 82c9b8e96b..f5427ccc08 100644 --- a/tools/clang/tools/dxcompiler/dxclinker.cpp +++ b/tools/clang/tools/dxcompiler/dxclinker.cpp @@ -18,7 +18,6 @@ #include "dxc/Support/dxcapi.impl.h" #include "dxc/Support/microcom.h" #include "dxc/dxcapi.h" -#include "dxillib.h" #include "llvm/ADT/SmallVector.h" #include diff --git a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp index ebeee380ef..84b568df9c 100644 --- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp +++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp @@ -56,7 +56,6 @@ #include "dxcompileradapter.h" #include "dxcshadersourceinfo.h" #include "dxcversion.inc" -#include "dxillib.h" #include #include @@ -850,11 +849,9 @@ class DxcCompiler : public IDxcCompiler3, compiler.getCodeGenOpts().HLSLValidatorMajorVer = opts.ValVerMajor; compiler.getCodeGenOpts().HLSLValidatorMinorVer = opts.ValVerMinor; } else { - // Version from dxil.dll, or internal validator if unavailable dxcutil::GetValidatorVersion( &compiler.getCodeGenOpts().HLSLValidatorMajorVer, - &compiler.getCodeGenOpts().HLSLValidatorMinorVer, - opts.SelectValidator); + &compiler.getCodeGenOpts().HLSLValidatorMinorVer); } // Root signature-only container validation is only supported on 1.5 and @@ -934,7 +931,7 @@ class DxcCompiler : public IDxcCompiler3, CComPtr pValErrors; // Validation failure communicated through diagnostic error dxcutil::ValidateRootSignatureInContainer( - pOutputBlob, &compiler.getDiagnostics(), opts.SelectValidator); + pOutputBlob, &compiler.getDiagnostics()); } } } else if (opts.VerifyDiagnostics) { @@ -1054,8 +1051,7 @@ class DxcCompiler : public IDxcCompiler3, std::move(serializeModule), pOutputBlob, m_pMalloc, SerializeFlags, pOutputStream, 0, opts.GetPDBName(), &compiler.getDiagnostics(), &ShaderHashContent, pReflectionStream, - pRootSigStream, pRootSignatureBlob, pPrivateBlob, - opts.SelectValidator); + pRootSigStream, pRootSignatureBlob, pPrivateBlob); inputs.pVersionInfo = static_cast(this); @@ -1108,8 +1104,7 @@ class DxcCompiler : public IDxcCompiler3, CComPtr pValErrors; // Validation failure communicated through diagnostic error dxcutil::ValidateRootSignatureInContainer( - pRootSignature, &compiler.getDiagnostics(), - opts.SelectValidator); + pRootSignature, &compiler.getDiagnostics()); } IFT(pResult->SetOutputObject(DXC_OUT_ROOT_SIGNATURE, pRootSignature)); @@ -1324,13 +1319,6 @@ class DxcCompiler : public IDxcCompiler3, CComPtr pResult; hr = e.hr; std::string msg("Internal Compiler error: "); - switch (hr) { - case DXC_E_VALIDATOR_MISSING: - msg = "Error: external validator selected, but DXIL.dll not found."; - break; - default: - break; - } msg += e.msg; if (SUCCEEDED(DxcResult::Create( e.hr, DXC_OUT_NONE, diff --git a/tools/clang/tools/dxcompiler/dxcutil.cpp b/tools/clang/tools/dxcompiler/dxcutil.cpp index d3a531d4c6..4e5c5c95e8 100644 --- a/tools/clang/tools/dxcompiler/dxcutil.cpp +++ b/tools/clang/tools/dxcompiler/dxcutil.cpp @@ -19,7 +19,6 @@ #include "dxc/Support/WinIncludes.h" #include "dxc/Support/dxcapi.impl.h" #include "dxc/dxcapi.h" -#include "dxillib.h" #include "clang/Basic/Diagnostic.h" #include "llvm/Bitcode/ReaderWriter.h" #include "llvm/IR/DebugInfo.h" @@ -49,23 +48,9 @@ HRESULT RunInternalValidator(IDxcValidator *pValidator, namespace { // AssembleToContainer helper functions. -bool CreateValidator(CComPtr &pValidator, - hlsl::options::ValidatorSelection SelectValidator = - hlsl::options::ValidatorSelection::Auto) { - bool bInternal = - SelectValidator == hlsl::options::ValidatorSelection::Internal; - bool bExternal = - SelectValidator == hlsl::options::ValidatorSelection::External; - if (!bInternal && DxilLibIsEnabled()) - DxilLibCreateInstance(CLSID_DxcValidator, &pValidator); - - bool bInternalValidator = false; - if (pValidator == nullptr) { - IFTBOOL(!bExternal, DXC_E_VALIDATOR_MISSING); - IFT(CreateDxcValidator(IID_PPV_ARGS(&pValidator))); - bInternalValidator = true; - } - return bInternalValidator; +// return true if the internal validator was used, false otherwise +void CreateValidator(CComPtr &pValidator) { + IFT(CreateDxcValidator(IID_PPV_ARGS(&pValidator))); } } // namespace @@ -79,23 +64,20 @@ AssembleInputs::AssembleInputs( uint32_t ValidationFlags, llvm::StringRef DebugName, clang::DiagnosticsEngine *pDiag, hlsl::DxilShaderHash *pShaderHashOut, AbstractMemoryStream *pReflectionOut, AbstractMemoryStream *pRootSigOut, - CComPtr pRootSigBlob, CComPtr pPrivateBlob, - hlsl::options::ValidatorSelection SelectValidator) + CComPtr pRootSigBlob, CComPtr pPrivateBlob) : pM(std::move(pM)), pOutputContainerBlob(pOutputContainerBlob), pMalloc(pMalloc), SerializeFlags(SerializeFlags), ValidationFlags(ValidationFlags), pModuleBitcode(pModuleBitcode), DebugName(DebugName), pDiag(pDiag), pShaderHashOut(pShaderHashOut), pReflectionOut(pReflectionOut), pRootSigOut(pRootSigOut), - pRootSigBlob(pRootSigBlob), pPrivateBlob(pPrivateBlob), - SelectValidator(SelectValidator) {} + pRootSigBlob(pRootSigBlob), pPrivateBlob(pPrivateBlob) {} -void GetValidatorVersion(unsigned *pMajor, unsigned *pMinor, - hlsl::options::ValidatorSelection SelectValidator) { +void GetValidatorVersion(unsigned *pMajor, unsigned *pMinor) { if (pMajor == nullptr || pMinor == nullptr) return; CComPtr pValidator; - CreateValidator(pValidator, SelectValidator); + CreateValidator(pValidator); CComPtr pVersionInfo; if (SUCCEEDED(pValidator.QueryInterface(&pVersionInfo))) { @@ -167,76 +149,19 @@ HRESULT ValidateAndAssembleToContainer(AssembleInputs &inputs) { std::unique_ptr llvmModuleWithDebugInfo; CComPtr pValidator; - bool bInternalValidator = CreateValidator(pValidator, inputs.SelectValidator); - // Warning on internal Validator - - CComPtr pValidator2; - if (!bInternalValidator) { - pValidator.QueryInterface(&pValidator2); - } + CreateValidator(pValidator); - if (bInternalValidator || pValidator2) { - // If using the internal validator or external validator supports - // IDxcValidator2, we'll use the modules directly. In this case, we'll want - // to make a clone to avoid SerializeDxilContainerForModule stripping all - // the debug info. The debug info will be stripped from the orginal module, - // but preserved in the cloned module. - if (llvm::getDebugMetadataVersionFromModule(*inputs.pM) != 0) { - llvmModuleWithDebugInfo.reset(llvm::CloneModule(inputs.pM.get())); - } - } - - // Verify validator version can validate this module - CComPtr pValidatorVersion; - IFT(pValidator->QueryInterface(&pValidatorVersion)); - UINT32 ValMajor, ValMinor; - IFT(pValidatorVersion->GetVersion(&ValMajor, &ValMinor)); - DxilModule &DM = inputs.pM.get()->GetOrCreateDxilModule(); - unsigned ReqValMajor, ReqValMinor; - DM.GetValidatorVersion(ReqValMajor, ReqValMinor); - if (DXIL::CompareVersions(ValMajor, ValMinor, ReqValMajor, ReqValMinor) < 0) { - // Module is expecting to be validated by a newer validator. - if (inputs.pDiag) { - unsigned diagID = inputs.pDiag->getCustomDiagID( - clang::DiagnosticsEngine::Level::Error, - "The module cannot be validated by the version of the validator " - "currently attached."); - inputs.pDiag->Report(diagID); - } - return E_FAIL; - } + if (llvm::getDebugMetadataVersionFromModule(*inputs.pM) != 0) + llvmModuleWithDebugInfo.reset(llvm::CloneModule(inputs.pM.get())); AssembleToContainer(inputs); CComPtr pValResult; - // Important: in-place edit is required so the blob is reused and thus - // dxil.dll can be released. + // In-place edit to avoid an extra copy inputs.ValidationFlags |= DxcValidatorFlags_InPlaceEdit; - if (bInternalValidator) { - IFT(RunInternalValidator(pValidator, llvmModuleWithDebugInfo.get(), - inputs.pOutputContainerBlob, - inputs.ValidationFlags, &pValResult)); - } else { - if (pValidator2 && llvmModuleWithDebugInfo) { - // If metadata was stripped, re-serialize the input module. - CComPtr pDebugModuleStream; - IFT(CreateMemoryStream(DxcGetThreadMallocNoRef(), &pDebugModuleStream)); - raw_stream_ostream outStream(pDebugModuleStream.p); - WriteBitcodeToFile(llvmModuleWithDebugInfo.get(), outStream, true); - outStream.flush(); - - DxcBuffer debugModule = {}; - debugModule.Ptr = pDebugModuleStream->GetPtr(); - debugModule.Size = pDebugModuleStream->GetPtrSize(); - - IFT(pValidator2->ValidateWithDebug(inputs.pOutputContainerBlob, - inputs.ValidationFlags, &debugModule, - &pValResult)); - } else { - IFT(pValidator->Validate(inputs.pOutputContainerBlob, - inputs.ValidationFlags, &pValResult)); - } - } + IFT(RunInternalValidator(pValidator, llvmModuleWithDebugInfo.get(), + inputs.pOutputContainerBlob, inputs.ValidationFlags, + &pValResult)); IFT(pValResult->GetStatus(&valHR)); if (inputs.pDiag) { if (FAILED(valHR)) { @@ -261,9 +186,8 @@ HRESULT ValidateAndAssembleToContainer(AssembleInputs &inputs) { return valHR; } -HRESULT ValidateRootSignatureInContainer( - IDxcBlob *pRootSigContainer, clang::DiagnosticsEngine *pDiag, - hlsl::options::ValidatorSelection SelectValidator) { +HRESULT ValidateRootSignatureInContainer(IDxcBlob *pRootSigContainer, + clang::DiagnosticsEngine *pDiag) { HRESULT valHR = S_OK; CComPtr pValidator; CComPtr pValResult; diff --git a/tools/clang/tools/dxcompiler/dxcutil.h b/tools/clang/tools/dxcompiler/dxcutil.h index 45b3d4dc1a..8612353561 100644 --- a/tools/clang/tools/dxcompiler/dxcutil.h +++ b/tools/clang/tools/dxcompiler/dxcutil.h @@ -54,9 +54,7 @@ struct AssembleInputs { hlsl::AbstractMemoryStream *pReflectionOut = nullptr, hlsl::AbstractMemoryStream *pRootSigOut = nullptr, CComPtr pRootSigBlob = nullptr, - CComPtr pPrivateBlob = nullptr, - hlsl::options::ValidatorSelection SelectValidator = - hlsl::options::ValidatorSelection::Auto); + CComPtr pPrivateBlob = nullptr); std::unique_ptr pM; CComPtr &pOutputContainerBlob; IDxcVersionInfo *pVersionInfo = nullptr; @@ -71,18 +69,13 @@ struct AssembleInputs { hlsl::AbstractMemoryStream *pRootSigOut = nullptr; CComPtr pRootSigBlob = nullptr; CComPtr pPrivateBlob = nullptr; - hlsl::options::ValidatorSelection SelectValidator = - hlsl::options::ValidatorSelection::Auto; }; HRESULT ValidateAndAssembleToContainer(AssembleInputs &inputs); -HRESULT ValidateRootSignatureInContainer( - IDxcBlob *pRootSigContainer, clang::DiagnosticsEngine *pDiag = nullptr, - hlsl::options::ValidatorSelection SelectValidator = - hlsl::options::ValidatorSelection::Auto); +HRESULT +ValidateRootSignatureInContainer(IDxcBlob *pRootSigContainer, + clang::DiagnosticsEngine *pDiag = nullptr); HRESULT SetRootSignature(hlsl::DxilModule *pModule, CComPtr pSource); -void GetValidatorVersion(unsigned *pMajor, unsigned *pMinor, - hlsl::options::ValidatorSelection SelectValidator = - hlsl::options::ValidatorSelection::Auto); +void GetValidatorVersion(unsigned *pMajor, unsigned *pMinor); void AssembleToContainer(AssembleInputs &inputs); HRESULT Disassemble(IDxcBlob *pProgram, llvm::raw_string_ostream &Stream); void ReadOptsAndValidate(hlsl::options::MainArgs &mainArgs, diff --git a/tools/clang/tools/dxcompiler/dxillib.cpp b/tools/clang/tools/dxcompiler/dxillib.cpp deleted file mode 100644 index 72abc869da..0000000000 --- a/tools/clang/tools/dxcompiler/dxillib.cpp +++ /dev/null @@ -1,73 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// // -// dxillib.cpp // -// Copyright (C) Microsoft Corporation. All rights reserved. // -// This file is distributed under the University of Illinois Open Source // -// License. See LICENSE.TXT for details. // -// // -// Provides access to dxil.dll // -// // -/////////////////////////////////////////////////////////////////////////////// - -#include "dxillib.h" -#include "dxc/Support/Global.h" // For DXASSERT -#include "dxc/Support/dxcapi.use.h" -#include "llvm/Support/Mutex.h" - -using namespace dxc; - -static DxcDllSupport g_DllSupport; -static HRESULT g_DllLibResult = S_OK; - -static llvm::sys::Mutex *cs = nullptr; - -// Check if we can successfully get IDxcValidator from dxil.dll -// This function is to prevent multiple attempts to load dxil.dll -HRESULT DxilLibInitialize() { - cs = new llvm::sys::Mutex; - cs->lock(); - g_DllLibResult = g_DllSupport.InitializeForDll(kDxilLib, "DxcCreateInstance"); - cs->unlock(); - return S_OK; -} - -HRESULT DxilLibCleanup(DxilLibCleanUpType type) { - HRESULT hr = S_OK; - if (type == DxilLibCleanUpType::ProcessTermination) { - g_DllSupport.Detach(); - } else if (type == DxilLibCleanUpType::UnloadLibrary) { - g_DllSupport.Cleanup(); - } else { - hr = E_INVALIDARG; - } - delete cs; - cs = nullptr; - return hr; -} - -// g_DllLibResult is S_OK by default, check again to see if dxil.dll is loaded -// If we fail to load dxil.dll, set g_DllLibResult to E_FAIL so that we don't -// have multiple attempts to load dxil.dll -bool DxilLibIsEnabled() { - cs->lock(); - if (SUCCEEDED(g_DllLibResult)) { - if (!g_DllSupport.IsEnabled()) { - g_DllLibResult = - g_DllSupport.InitializeForDll(kDxilLib, "DxcCreateInstance"); - } - } - cs->unlock(); - return SUCCEEDED(g_DllLibResult); -} - -HRESULT DxilLibCreateInstance(REFCLSID rclsid, REFIID riid, - IUnknown **ppInterface) { - DXASSERT_NOMSG(ppInterface != nullptr); - HRESULT hr = E_FAIL; - if (DxilLibIsEnabled()) { - cs->lock(); - hr = g_DllSupport.CreateInstance(rclsid, riid, ppInterface); - cs->unlock(); - } - return hr; -} diff --git a/tools/clang/tools/dxcompiler/dxillib.h b/tools/clang/tools/dxcompiler/dxillib.h deleted file mode 100644 index 879d023459..0000000000 --- a/tools/clang/tools/dxcompiler/dxillib.h +++ /dev/null @@ -1,42 +0,0 @@ -/////////////////////////////////////////////////////////////////////////////// -// // -// dxillib.h // -// Copyright (C) Microsoft Corporation. All rights reserved. // -// This file is distributed under the University of Illinois Open Source // -// License. See LICENSE.TXT for details. // -// // -// Provides wrappers to handle calls to dxil.dll // -// // -/////////////////////////////////////////////////////////////////////////////// - -#pragma once -#ifndef __DXC_DXILLIB__ -#define __DXC_DXILLIB__ - -#include "dxc/Support/WinIncludes.h" -#include "dxc/WinAdapter.h" - -// Initialize Dxil library. -HRESULT DxilLibInitialize(); - -// When dxcompiler is detached from process, -// we should not call FreeLibrary on process termination. -// So the caller has to specify if cleaning is from FreeLibrary or process -// termination -enum class DxilLibCleanUpType { UnloadLibrary, ProcessTermination }; - -HRESULT DxilLibCleanup(DxilLibCleanUpType type); - -// Check if can access dxil.dll -bool DxilLibIsEnabled(); - -HRESULT DxilLibCreateInstance(REFCLSID rclsid, REFIID riid, - IUnknown **ppInterface); - -template -HRESULT DxilLibCreateInstance(REFCLSID rclsid, TInterface **ppInterface) { - return DxilLibCreateInstance(rclsid, __uuidof(TInterface), - (IUnknown **)ppInterface); -} - -#endif // __DXC_DXILLIB__ diff --git a/tools/clang/unittests/HLSL/DxilContainerTest.cpp b/tools/clang/unittests/HLSL/DxilContainerTest.cpp index 339b33c655..34b4d338fe 100644 --- a/tools/clang/unittests/HLSL/DxilContainerTest.cpp +++ b/tools/clang/unittests/HLSL/DxilContainerTest.cpp @@ -103,6 +103,7 @@ class DxilContainerTest : public ::testing::Test { TEST_METHOD(CompileCSWaveSizeRange_CheckPSV0) TEST_METHOD(CompileWhenOkThenCheckRDAT) TEST_METHOD(CompileWhenOkThenCheckRDAT2) + TEST_METHOD(CompileWhenOkThenCheckRDATSM69) TEST_METHOD(CompileWhenOkThenCheckReflection1) TEST_METHOD(DxcUtils_CreateReflection) TEST_METHOD(CheckReflectionQueryInterface) @@ -1444,6 +1445,146 @@ TEST_F(DxilContainerTest, CompileCSWaveSizeRange_CheckPSV0) { TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) { if (m_ver.SkipDxilVersion(1, 3)) return; + const char *shader = + "float c_buf;" + "RWTexture1D tex : register(u5);" + "Texture1D tex2 : register(t0);" + "RWByteAddressBuffer b_buf;" + "struct Foo { float2 f2; int2 i2; };" + "AppendStructuredBuffer append_buf;" + "ConsumeStructuredBuffer consume_buf;" + "RasterizerOrderedByteAddressBuffer rov_buf;" + "globallycoherent RWByteAddressBuffer gc_buf;" + "float function_import(float x);" + "export float function0(min16float x) { " + " return x + 1 + tex[0].x; }" + "export float function1(float x, min12int i) {" + " return x + c_buf + b_buf.Load(x) + tex2[i].x; }" + "export float function2(float x) { return x + function_import(x); }" + "export void function3(int i) {" + " Foo f = consume_buf.Consume();" + " f.f2 += 0.5; append_buf.Append(f);" + " rov_buf.Store(i, f.i2.x);" + " gc_buf.Store(i, f.i2.y);" + " b_buf.Store(i, f.i2.x + f.i2.y); }"; + CComPtr pCompiler; + CComPtr pSource; + CComPtr pProgram; + CComPtr pDisassembly; + CComPtr pResult; + + struct CheckResFlagInfo { + std::string name; + hlsl::DXIL::ResourceKind kind; + hlsl::RDAT::DxilResourceFlag flag; + }; + const unsigned numResFlagCheck = 5; + CheckResFlagInfo resFlags[numResFlagCheck] = { + {"b_buf", hlsl::DXIL::ResourceKind::RawBuffer, + hlsl::RDAT::DxilResourceFlag::None}, + {"append_buf", hlsl::DXIL::ResourceKind::StructuredBuffer, + hlsl::RDAT::DxilResourceFlag::UAVCounter}, + {"consume_buf", hlsl::DXIL::ResourceKind::StructuredBuffer, + hlsl::RDAT::DxilResourceFlag::UAVCounter}, + {"gc_buf", hlsl::DXIL::ResourceKind::RawBuffer, + hlsl::RDAT::DxilResourceFlag::UAVGloballyCoherent}, + {"rov_buf", hlsl::DXIL::ResourceKind::RawBuffer, + hlsl::RDAT::DxilResourceFlag::UAVRasterizerOrderedView}}; + + VERIFY_SUCCEEDED(CreateCompiler(&pCompiler)); + CreateBlobFromText(shader, &pSource); + VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"hlsl.hlsl", L"main", + L"lib_6_3", nullptr, 0, nullptr, 0, + nullptr, &pResult)); + HRESULT hrStatus; + VERIFY_SUCCEEDED(pResult->GetStatus(&hrStatus)); + VERIFY_SUCCEEDED(hrStatus); + VERIFY_SUCCEEDED(pResult->GetResult(&pProgram)); + CComPtr containerReflection; + uint32_t partCount; + IFT(m_dllSupport.CreateInstance(CLSID_DxcContainerReflection, + &containerReflection)); + IFT(containerReflection->Load(pProgram)); + IFT(containerReflection->GetPartCount(&partCount)); + bool blobFound = false; + for (uint32_t i = 0; i < partCount; ++i) { + uint32_t kind; + IFT(containerReflection->GetPartKind(i, &kind)); + if (kind == (uint32_t)hlsl::DxilFourCC::DFCC_RuntimeData) { + blobFound = true; + using namespace hlsl::RDAT; + CComPtr pBlob; + IFT(containerReflection->GetPartContent(i, &pBlob)); + // Validate using DxilRuntimeData + DxilRuntimeData context; + context.InitFromRDAT((char *)pBlob->GetBufferPointer(), + pBlob->GetBufferSize()); + auto funcTable = context.GetFunctionTable(); + auto resTable = context.GetResourceTable(); + VERIFY_ARE_EQUAL(funcTable.Count(), 4U); + std::string str("function"); + for (uint32_t j = 0; j < funcTable.Count(); ++j) { + auto funcReader = funcTable[j]; + std::string funcName(funcReader.getUnmangledName()); + VERIFY_IS_TRUE(str.compare(funcName.substr(0, 8)) == 0); + std::string cur_str = str; + cur_str.push_back('0' + j); + if (cur_str.compare("function0") == 0) { + VERIFY_ARE_EQUAL(funcReader.getResources().Count(), 1U); + hlsl::ShaderFlags flag; + flag.SetUAVLoadAdditionalFormats(true); + flag.SetLowPrecisionPresent(true); + uint64_t rawFlag = flag.GetFeatureInfo(); + VERIFY_ARE_EQUAL(funcReader.GetFeatureFlags(), rawFlag); + auto resReader = funcReader.getResources()[0]; + VERIFY_ARE_EQUAL(resReader.getClass(), + hlsl::DXIL::ResourceClass::UAV); + VERIFY_ARE_EQUAL(resReader.getKind(), + hlsl::DXIL::ResourceKind::Texture1D); + } else if (cur_str.compare("function1") == 0) { + hlsl::ShaderFlags flag; + flag.SetLowPrecisionPresent(true); + uint64_t rawFlag = flag.GetFeatureInfo(); + VERIFY_ARE_EQUAL(funcReader.GetFeatureFlags(), rawFlag); + VERIFY_ARE_EQUAL(funcReader.getResources().Count(), 3U); + } else if (cur_str.compare("function2") == 0) { + VERIFY_ARE_EQUAL(funcReader.GetFeatureFlags() & 0xffffffffffffffff, + 0U); + VERIFY_ARE_EQUAL(funcReader.getResources().Count(), 0U); + std::string dependency = funcReader.getFunctionDependencies()[0]; + VERIFY_IS_TRUE(dependency.find("function_import") != + std::string::npos); + } else if (cur_str.compare("function3") == 0) { + VERIFY_ARE_EQUAL(funcReader.GetFeatureFlags() & 0xffffffffffffffff, + 0U); + VERIFY_ARE_EQUAL(funcReader.getResources().Count(), numResFlagCheck); + for (unsigned i = 0; i < funcReader.getResources().Count(); ++i) { + auto resReader = funcReader.getResources()[0]; + VERIFY_ARE_EQUAL(resReader.getClass(), + hlsl::DXIL::ResourceClass::UAV); + unsigned j = 0; + for (; j < numResFlagCheck; ++j) { + if (resFlags[j].name.compare(resReader.getName()) == 0) + break; + } + VERIFY_IS_LESS_THAN(j, numResFlagCheck); + VERIFY_ARE_EQUAL(resReader.getKind(), resFlags[j].kind); + VERIFY_ARE_EQUAL(resReader.getFlags(), + static_cast(resFlags[j].flag)); + } + } else { + IFTBOOLMSG(false, E_FAIL, "unknown function name"); + } + } + VERIFY_ARE_EQUAL(resTable.Count(), 8U); + } + } + IFTBOOLMSG(blobFound, E_FAIL, "failed to find RDAT blob after compiling"); +} + +TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDATSM69) { + if (m_ver.SkipDxilVersion(1, 9)) + return; const char *shader = "float c_buf;" "RWTexture1D tex : register(u5);" @@ -1497,7 +1638,7 @@ TEST_F(DxilContainerTest, CompileWhenOkThenCheckRDAT) { VERIFY_SUCCEEDED(CreateCompiler(&pCompiler)); CreateBlobFromText(shader, &pSource); VERIFY_SUCCEEDED(pCompiler->Compile(pSource, L"hlsl.hlsl", L"main", - L"lib_6_3", nullptr, 0, nullptr, 0, + L"lib_6_9", nullptr, 0, nullptr, 0, nullptr, &pResult)); HRESULT hrStatus; VERIFY_SUCCEEDED(pResult->GetStatus(&hrStatus)); diff --git a/tools/clang/unittests/HLSL/PixDiaTest.cpp b/tools/clang/unittests/HLSL/PixDiaTest.cpp index a4439b998d..d36e762762 100644 --- a/tools/clang/unittests/HLSL/PixDiaTest.cpp +++ b/tools/clang/unittests/HLSL/PixDiaTest.cpp @@ -13,6 +13,7 @@ #ifdef _WIN32 #include +#include #include "dxc/DxilContainer/DxilContainer.h" #include "dxc/Support/WinIncludes.h" @@ -186,6 +187,7 @@ class PixDiaTest { TEST_METHOD(DxcPixDxilDebugInfo_BitFields_Derived) TEST_METHOD(DxcPixDxilDebugInfo_BitFields_Bool) TEST_METHOD(DxcPixDxilDebugInfo_BitFields_Overlap) + TEST_METHOD(DxcPixDxilDebugInfo_BitFields_uint64) TEST_METHOD(DxcPixDxilDebugInfo_Min16SizesAndOffsets_Enabled) TEST_METHOD(DxcPixDxilDebugInfo_Min16SizesAndOffsets_Disabled) TEST_METHOD(DxcPixDxilDebugInfo_Min16VectorOffsets_Enabled) @@ -658,11 +660,11 @@ class PixDiaTest { const char *hlsl, const wchar_t *profile, const char *lineAtWhichToExamineVariables, std::vector const &ExpectedVariables); - void RunSizeAndOffsetTestCase(const char *hlsl, - std::array const &memberOffsets, - std::array const &memberSizes, - std::vector extraArgs = { - L"-Od"}); + CComPtr + RunSizeAndOffsetTestCase(const char *hlsl, + std::array const &memberOffsets, + std::array const &memberSizes, + std::vector extraArgs = {L"-Od"}); void RunVectorSizeAndOffsetTestCase(const char *hlsl, std::array const &memberOffsets, std::vector extraArgs = { @@ -2948,12 +2950,11 @@ void main() VERIFY_ARE_EQUAL(32u, secondFieldOffset); } -void PixDiaTest::RunSizeAndOffsetTestCase( - const char *hlsl, std::array const &memberOffsets, - std::array const &memberSizes, - std::vector extraArgs) { - if (m_ver.SkipDxilVersion(1, 5)) - return; +CComPtr +PixDiaTest::RunSizeAndOffsetTestCase(const char *hlsl, + std::array const &memberOffsets, + std::array const &memberSizes, + std::vector extraArgs) { auto debugInfo = CompileAndCreateDxcDebug(hlsl, L"cs_6_5", nullptr, extraArgs).debugInfo; auto live = GetLiveVariablesAt(hlsl, "STOP_HERE", debugInfo); @@ -2974,9 +2975,46 @@ void PixDiaTest::RunSizeAndOffsetTestCase( VERIFY_SUCCEEDED(field->GetFieldSizeInBits(&sizeInBits)); VERIFY_ARE_EQUAL(memberSizes[i], sizeInBits); } + // Check that first and second and third are reported as residing in the same + // register (cuz they do!), and that the third does not + + CComPtr bfStorage; + VERIFY_SUCCEEDED(bf->GetStorage(&bfStorage)); + return bfStorage; +} + +void RunBitfieldAdjacencyTest( + IDxcPixDxilStorage *bfStorage, + std::vector> const &adjacentRuns) { + std::vector> registersByRun; + registersByRun.resize(adjacentRuns.size()); + for (size_t run = 0; run < adjacentRuns.size(); ++run) { + for (auto const &field : adjacentRuns[run]) { + CComPtr fieldStorage; + VERIFY_SUCCEEDED(bfStorage->AccessField(field, &fieldStorage)); + DWORD reg; + VERIFY_SUCCEEDED(fieldStorage->GetRegisterNumber(®)); + registersByRun[run].insert(reg); + } + } + for (size_t run = 0; run < registersByRun.size(); ++run) { + { + // Every field in this run should have the same register number, so this + // set should be of size 1: + VERIFY_ARE_EQUAL(1, registersByRun[run].size()); + // Every adjacent run should have different register numbers: + if (run != 0) { + VERIFY_ARE_NOT_EQUAL(*registersByRun[run - 1].begin(), + *registersByRun[run].begin()); + } + } + } } TEST_F(PixDiaTest, DxcPixDxilDebugInfo_BitFields_Simple) { + if (m_ver.SkipDxilVersion(1, 5)) + return; + const char *hlsl = R"( struct Bitfields { @@ -3000,10 +3038,16 @@ void main() } )"; - RunSizeAndOffsetTestCase(hlsl, {0, 17, 32, 64}, {17, 15, 3, 32}); + auto bfStorage = + RunSizeAndOffsetTestCase(hlsl, {0, 17, 32, 64}, {17, 15, 3, 32}); + RunBitfieldAdjacencyTest(bfStorage, + {{L"first", L"second"}, {L"third"}, {L"fourth"}}); } TEST_F(PixDiaTest, DxcPixDxilDebugInfo_BitFields_Derived) { + if (m_ver.SkipDxilVersion(1, 5)) + return; + const char *hlsl = R"( struct Bitfields { @@ -3027,10 +3071,16 @@ void main() } )"; - RunSizeAndOffsetTestCase(hlsl, {0, 17, 32, 64}, {17, 15, 3, 32}); + auto bfStorage = + RunSizeAndOffsetTestCase(hlsl, {0, 17, 32, 64}, {17, 15, 3, 32}); + RunBitfieldAdjacencyTest(bfStorage, + {{L"first", L"second"}, {L"third"}, {L"fourth"}}); } TEST_F(PixDiaTest, DxcPixDxilDebugInfo_BitFields_Bool) { + if (m_ver.SkipDxilVersion(1, 5)) + return; + const char *hlsl = R"( struct Bitfields { @@ -3054,17 +3104,58 @@ void main() } )"; - RunSizeAndOffsetTestCase(hlsl, {0, 1, 2, 32}, {1, 1, 3, 32}); + auto bfStorage = RunSizeAndOffsetTestCase(hlsl, {0, 1, 2, 32}, {1, 1, 3, 32}); + RunBitfieldAdjacencyTest(bfStorage, + {{L"first", L"second", L"third"}, {L"fourth"}}); } TEST_F(PixDiaTest, DxcPixDxilDebugInfo_BitFields_Overlap) { + if (m_ver.SkipDxilVersion(1, 5)) + return; + + const char *hlsl = R"( +struct Bitfields +{ + uint32_t first : 20; + uint32_t second : 20; // should end up in second DWORD + uint32_t third : 3; // should shader second DWORD + uint32_t fourth; // should be in third DWORD +}; + +RWStructuredBuffer UAV: register(u0); + +[numthreads(1, 1, 1)] +void main() +{ + Bitfields bf; + bf.first = UAV[0]; + bf.second = UAV[1]; + bf.third = UAV[2]; + bf.fourth = UAV[3]; + UAV[16] = bf.first + bf.second + bf.third + bf.fourth; //STOP_HERE +} + +)"; + auto bfStorage = + RunSizeAndOffsetTestCase(hlsl, {0, 32, 52, 64}, {20, 20, 3, 32}); + // (PIX #58022343): fields that overlap their storage type are not yet + // reflected properly in terms of their packed offsets as maintained via + // these PixDxc interfaces based on the dbg.declare data + // RunBitfieldAdjacencyTest(bfStorage, + // {{L"first"}, {L"second", L"third"}, {L"fourth"}}); +} + +TEST_F(PixDiaTest, DxcPixDxilDebugInfo_BitFields_uint64) { + if (m_ver.SkipDxilVersion(1, 5)) + return; + const char *hlsl = R"( struct Bitfields { - unsigned int first : 20; - unsigned int second : 20; // should end up in second DWORD - unsigned int third : 3; // should shader second DWORD - unsigned int fourth; // should be in third DWORD + uint64_t first : 20; + uint64_t second : 20; // should end up in first uint64 also + uint64_t third : 24; // in first + uint64_t fourth; // should be in second }; RWStructuredBuffer UAV: register(u0); @@ -3081,7 +3172,10 @@ void main() } )"; - RunSizeAndOffsetTestCase(hlsl, {0, 32, 52, 64}, {20, 20, 3, 32}); + auto bfStorage = + RunSizeAndOffsetTestCase(hlsl, {0, 20, 40, 64}, {20, 20, 24, 64}); + RunBitfieldAdjacencyTest(bfStorage, + {{L"first", L"second", L"third"}, {L"fourth"}}); } TEST_F(PixDiaTest, DxcPixDxilDebugInfo_Alignment_ConstInt) { @@ -3502,9 +3596,10 @@ void ClosestHitShader3(inout RayPayload payload, in BuiltInTriangleIntersectionA // Case: same function called from two places in same top-level function. // In this case, we expect the storage for the variable to be in the same - // place for both "instances" of the function: as a thread proceeds through - // the caller, it will write new values into the variable's storage during - // the second or subsequent invocations of the inlined function. + // place for both "instances" of the function: as a thread proceeds + // through the caller, it will write new values into the variable's + // storage during the second or subsequent invocations of the inlined + // function. DWORD instructionOffset = AdvanceUntilFunctionEntered(dxilDebugger, 0, L"ClosestHitShader3"); instructionOffset = AdvanceUntilFunctionEntered( @@ -3550,9 +3645,10 @@ TEST_F(PixDiaTest, DxcPixDxilDebugInfo_VariableScopes_ForScopes) { // Case: same function called from two places in same top-level function. // In this case, we expect the storage for the variable to be in the same - // place for both "instances" of the function: as a thread proceeds through - // the caller, it will write new values into the variable's storage during - // the second or subsequent invocations of the inlined function. + // place for both "instances" of the function: as a thread proceeds + // through the caller, it will write new values into the variable's + // storage during the second or subsequent invocations of the inlined + // function. DWORD instructionOffset = AdvanceUntilFunctionEntered(dxilDebugger, 0, L"CSMain"); @@ -3597,9 +3693,10 @@ TEST_F(PixDiaTest, DxcPixDxilDebugInfo_VariableScopes_ScopeBraces) { // Case: same function called from two places in same top-level function. // In this case, we expect the storage for the variable to be in the same - // place for both "instances" of the function: as a thread proceeds through - // the caller, it will write new values into the variable's storage during - // the second or subsequent invocations of the inlined function. + // place for both "instances" of the function: as a thread proceeds + // through the caller, it will write new values into the variable's + // storage during the second or subsequent invocations of the inlined + // function. DWORD instructionOffset = AdvanceUntilFunctionEntered(dxilDebugger, 0, L"CSMain"); @@ -3644,9 +3741,10 @@ TEST_F(PixDiaTest, DxcPixDxilDebugInfo_VariableScopes_Function) { // Case: same function called from two places in same top-level function. // In this case, we expect the storage for the variable to be in the same - // place for both "instances" of the function: as a thread proceeds through - // the caller, it will write new values into the variable's storage during - // the second or subsequent invocations of the inlined function. + // place for both "instances" of the function: as a thread proceeds + // through the caller, it will write new values into the variable's + // storage during the second or subsequent invocations of the inlined + // function. DWORD instructionOffset = AdvanceUntilFunctionEntered(dxilDebugger, 0, L"CSMain"); @@ -3692,9 +3790,10 @@ void CSMain() // Case: same function called from two places in same top-level function. // In this case, we expect the storage for the variable to be in the same - // place for both "instances" of the function: as a thread proceeds through - // the caller, it will write new values into the variable's storage during - // the second or subsequent invocations of the inlined function. + // place for both "instances" of the function: as a thread proceeds + // through the caller, it will write new values into the variable's + // storage during the second or subsequent invocations of the inlined + // function. DWORD instructionOffset = AdvanceUntilFunctionEntered(dxilDebugger, 0, L"CSMain"); diff --git a/tools/clang/unittests/HLSL/PixTest.cpp b/tools/clang/unittests/HLSL/PixTest.cpp index e337d2951c..c032e9e872 100644 --- a/tools/clang/unittests/HLSL/PixTest.cpp +++ b/tools/clang/unittests/HLSL/PixTest.cpp @@ -119,7 +119,6 @@ class PixTest : public ::testing::Test { TEST_METHOD(AccessTracking_ModificationReport_SM66) TEST_METHOD(PixStructAnnotation_Lib_DualRaygen) - TEST_METHOD(PixStructAnnotation_Lib_RaygenAllocaStructAlignment) TEST_METHOD(PixStructAnnotation_Simple) TEST_METHOD(PixStructAnnotation_CopiedStruct) @@ -1221,7 +1220,6 @@ PixTest::TestableResults PixTest::TestStructAnnotationCase( #if 0 // handy for debugging auto disTextW = Disassemble(pAnnotatedContainer); - WEX::Logging::Log::Comment(disTextW.c_str()); #endif ModuleAndHangersOn moduleEtc(pAnnotatedContainer); @@ -1455,100 +1453,6 @@ void Raygen1() } } -TEST_F(PixTest, PixStructAnnotation_Lib_RaygenAllocaStructAlignment) { - if (m_ver.SkipDxilVersion(1, 5)) - return; - - const char *hlsl = R"( - -RaytracingAccelerationStructure Scene : register(t0, space0); -RWTexture2D RenderTarget : register(u0); - -struct SceneConstantBuffer -{ - float4x4 projectionToWorld; - float4 cameraPosition; - float4 lightPosition; - float4 lightAmbientColor; - float4 lightDiffuseColor; -}; - -ConstantBuffer g_sceneCB : register(b0); - -struct RayPayload -{ - float4 color; -}; - -inline void GenerateCameraRay(uint2 index, out float3 origin, out float3 direction) -{ - float2 xy = index + 0.5f; // center in the middle of the pixel. - float2 screenPos = xy;// / DispatchRaysDimensions().xy * 2.0 - 1.0; - - // Invert Y for DirectX-style coordinates. - screenPos.y = -screenPos.y; - - // Unproject the pixel coordinate into a ray. - float4 world = /*mul(*/float4(screenPos, 0, 1)/*, g_sceneCB.projectionToWorld)*/; - - //world.xyz /= world.w; - origin = world.xyz; //g_sceneCB.cameraPosition.xyz; - direction = float3(1,0,0);//normalize(world.xyz - origin); -} - -void RaygenCommon() -{ - float3 rayDir; - float3 origin; - - // Generate a ray for a camera pixel corresponding to an index from the dispatched 2D grid. - GenerateCameraRay(DispatchRaysIndex().xy, origin, rayDir); - - // Trace the ray. - // Set the ray's extents. - RayDesc ray; - ray.Origin = origin; - ray.Direction = rayDir; - // Set TMin to a non-zero small value to avoid aliasing issues due to floating - point errors. - // TMin should be kept small to prevent missing geometry at close contact areas. - ray.TMin = 0.001; - ray.TMax = 10000.0; - RayPayload payload = { float4(0, 0, 0, 0) }; - TraceRay(Scene, RAY_FLAG_CULL_BACK_FACING_TRIANGLES, ~0, 0, 1, 0, ray, payload); - - // Write the raytraced color to the output texture. - // RenderTarget[DispatchRaysIndex().xy] = payload.color; -} - -[shader("raygeneration")] -void Raygen() -{ - RaygenCommon(); -} -)"; - - auto Testables = TestStructAnnotationCase(hlsl, L"-Od", true, L"lib_6_6"); - - // Built-in type "RayDesc" has this structure: struct { float3 Origin; float - // TMin; float3 Direction; float TMax; } This is 8 floats, with members at - // offsets 0,3,4,7 respectively. - - auto FindAtLeastOneOf = [=](char const *name, uint32_t index) { - VERIFY_IS_TRUE(std::find_if(Testables.AllocaWrites.begin(), - Testables.AllocaWrites.end(), - [&name, &index](AllocaWrite const &aw) { - return 0 == strcmp(aw.memberName.c_str(), - name) && - aw.index == index; - }) != Testables.AllocaWrites.end()); - }; - - FindAtLeastOneOf("Origin.x", 0); - FindAtLeastOneOf("TMin", 3); - FindAtLeastOneOf("Direction.x", 4); - FindAtLeastOneOf("TMax", 7); -} - TEST_F(PixTest, PixStructAnnotation_Simple) { if (m_ver.SkipDxilVersion(1, 5)) return; @@ -3441,7 +3345,6 @@ void RaygenInternalName() // check that there are alloca writes that cover all of them. RayPayload // has four elements, and RayDesc has eight. std::array RayPayloadElementCoverage; - std::array RayDescElementCoverage; for (auto const &write : metaDataKeyToValue.allocaWrites) { // the whole point of the changes with this test is to separate vector @@ -3452,14 +3355,10 @@ void RaygenInternalName() if (findAlloca != metaDataKeyToValue.allocaDefinitions.end()) { if (findAlloca->second.count == 4) { RayPayloadElementCoverage[write.second.offset] = true; - } else if (findAlloca->second.count == 8) { - RayDescElementCoverage[write.second.offset] = true; } } } // Check that coverage for every element was emitted: for (auto const &b : RayPayloadElementCoverage) VERIFY_IS_TRUE(b); - for (auto const &b : RayDescElementCoverage) - VERIFY_IS_TRUE(b); } diff --git a/tools/clang/unittests/HLSL/ValidationTest.cpp b/tools/clang/unittests/HLSL/ValidationTest.cpp index 01f24e0227..980bf6c7c2 100644 --- a/tools/clang/unittests/HLSL/ValidationTest.cpp +++ b/tools/clang/unittests/HLSL/ValidationTest.cpp @@ -1488,7 +1488,7 @@ TEST_F(ValidationTest, StructBufGlobalCoherentAndCounter) { L"..\\DXILValidation\\struct_buf1.hlsl", "ps_6_0", "!\"buf2\", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false", "!\"buf2\", i32 0, i32 0, i32 1, i32 12, i1 true, i1 true", - "globallycoherent cannot be used with append/consume buffers: 'buf2'"); + "globallycoherent cannot be used on buffer with counter 'buf2'"); } TEST_F(ValidationTest, StructBufStrideAlign) { diff --git a/tools/clang/unittests/HLSLExec/CMakeLists.txt b/tools/clang/unittests/HLSLExec/CMakeLists.txt index 3878fa3f34..b490ac94e9 100644 --- a/tools/clang/unittests/HLSLExec/CMakeLists.txt +++ b/tools/clang/unittests/HLSLExec/CMakeLists.txt @@ -3,9 +3,13 @@ find_package(TAEF REQUIRED) find_package(D3D12 REQUIRED) # Used for ExecutionTest.cpp. +set(CMAKE_CXX_FLAGS "${CMAKE_CXX_FLAGS} /bigobj") + add_clang_library(ExecHLSLTests SHARED ExecutionTest.cpp ShaderOpTest.cpp + TableParameterHandler.cpp + LongVectors.cpp ExecHLSLTests.rc ) diff --git a/tools/clang/unittests/HLSLExec/ExecHLSLTests.rc b/tools/clang/unittests/HLSLExec/ExecHLSLTests.rc index 6f4659910c..29459ee825 100644 --- a/tools/clang/unittests/HLSLExec/ExecHLSLTests.rc +++ b/tools/clang/unittests/HLSLExec/ExecHLSLTests.rc @@ -1,3 +1,4 @@ #include -ShaderOpArithTable.xml DATASOURCE_XML "ShaderOpArithTable.xml" \ No newline at end of file +ShaderOpArithTable.xml DATASOURCE_XML "ShaderOpArithTable.xml" +LongVectorOpTable.xml DATASOURCE_XML "LongVectorOpTable.xml" diff --git a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp index 6db27d7a41..586c55328d 100644 --- a/tools/clang/unittests/HLSLExec/ExecutionTest.cpp +++ b/tools/clang/unittests/HLSLExec/ExecutionTest.cpp @@ -11,7 +11,7 @@ /////////////////////////////////////////////////////////////////////////////// // We need to keep & fix these warnings to integrate smoothly with HLK -#pragma warning(error : 4100 4146 4242 4244 4267 4701 4389 4018) +#pragma warning(error : 4100 4242 4244 4267 4701 4389 4018) // *** THIS FILE CANNOT TAKE ANY LLVM DEPENDENCIES *** // @@ -60,6 +60,8 @@ #include "ShaderOpTest.h" #include #include +#include "TableParameterHandler.h" +#include "HlslExecTestUtils.h" // clang-format on #pragma comment(lib, "d3dcompiler.lib") @@ -67,47 +69,6 @@ #pragma comment(lib, "dxguid.lib") #pragma comment(lib, "version.lib") -// A more recent Windows SDK than currently required is needed for these. -typedef HRESULT(WINAPI *D3D12EnableExperimentalFeaturesFn)( - UINT NumFeatures, __in_ecount(NumFeatures) const IID *pIIDs, - __in_ecount_opt(NumFeatures) void *pConfigurationStructs, - __in_ecount_opt(NumFeatures) UINT *pConfigurationStructSizes); - -static const GUID D3D12ExperimentalShaderModelsID = - {/* 76f5573e-f13a-40f5-b297-81ce9e18933f */ - 0x76f5573e, - 0xf13a, - 0x40f5, - {0xb2, 0x97, 0x81, 0xce, 0x9e, 0x18, 0x93, 0x3f}}; - -// Used to create D3D12SDKConfiguration to enable AgilitySDK programmatically. -typedef HRESULT(WINAPI *D3D12GetInterfaceFn)(REFCLSID rclsid, REFIID riid, - void **ppvDebug); - -#ifndef __ID3D12SDKConfiguration_INTERFACE_DEFINED__ -// Copied from AgilitySDK D3D12.h to programmatically enable when in developer -// mode. -#define __ID3D12SDKConfiguration_INTERFACE_DEFINED__ - -EXTERN_C const GUID DECLSPEC_SELECTANY IID_ID3D12SDKConfiguration = { - 0xe9eb5314, - 0x33aa, - 0x42b2, - {0xa7, 0x18, 0xd7, 0x7f, 0x58, 0xb1, 0xf1, 0xc7}}; -EXTERN_C const GUID DECLSPEC_SELECTANY CLSID_D3D12SDKConfiguration = { - 0x7cda6aca, - 0xa03e, - 0x49c8, - {0x94, 0x58, 0x03, 0x34, 0xd2, 0x0e, 0x07, 0xce}}; - -MIDL_INTERFACE("e9eb5314-33aa-42b2-a718-d77f58b1f1c7") -ID3D12SDKConfiguration : public IUnknown { -public: - virtual HRESULT STDMETHODCALLTYPE SetSDKVersion(UINT SDKVersion, - LPCSTR SDKPath) = 0; -}; -#endif /* __ID3D12SDKConfiguration_INTERFACE_DEFINED__ */ - using namespace DirectX; using namespace hlsl_test; @@ -271,9 +232,6 @@ typedef struct D3D12_FEATURE_DATA_D3D12_OPTIONS4 { #endif -// Virtual class to compute the expected result given a set of inputs -struct TableParameter; - class ExecutionTest { public: BEGIN_TEST_CLASS(ExecutionTest) @@ -519,10 +477,10 @@ class ExecutionTest { return false; // Do not: FreeLibrary(hRuntime); // If we actually free the library, it defeats the purpose of - // EnableAgilitySDK and EnableExperimentalMode. + // enableAgilitySDK and enableExperimentalMode. HRESULT hr; - hr = EnableAgilitySDK(hRuntime); + hr = enableAgilitySDK(hRuntime); if (FAILED(hr)) { LogCommentFmt(L"Unable to enable Agility SDK - 0x%08x.", hr); } else if (hr == S_FALSE) { @@ -531,7 +489,7 @@ class ExecutionTest { LogCommentFmt(L"Agility SDK enabled."); } - hr = EnableExperimentalMode(hRuntime); + hr = enableExperimentalMode(hRuntime); if (FAILED(hr)) { LogCommentFmt(L"Unable to enable shader experimental mode - 0x%08x.", hr); @@ -541,7 +499,7 @@ class ExecutionTest { LogCommentFmt(L"Experimental mode enabled."); } - hr = EnableDebugLayer(); + hr = enableDebugLayer(); if (FAILED(hr)) { LogCommentFmt(L"Unable to enable debug layer - 0x%08x.", hr); } else if (hr == S_FALSE) { @@ -602,41 +560,31 @@ class ExecutionTest { // Do not remove the following line - it is used by TranslateExecutionTest.py // MARKER: ExecutionTest/DxilConf Shared Implementation Start - // This is defined in d3d.h for Windows 10 Anniversary Edition SDK, but we - // only require the Windows 10 SDK. - typedef enum D3D_SHADER_MODEL { - D3D_SHADER_MODEL_5_1 = 0x51, - D3D_SHADER_MODEL_6_0 = 0x60, - D3D_SHADER_MODEL_6_1 = 0x61, - D3D_SHADER_MODEL_6_2 = 0x62, - D3D_SHADER_MODEL_6_3 = 0x63, - D3D_SHADER_MODEL_6_4 = 0x64, - D3D_SHADER_MODEL_6_5 = 0x65, - D3D_SHADER_MODEL_6_6 = 0x66, - D3D_SHADER_MODEL_6_7 = 0x67, - D3D_SHADER_MODEL_6_8 = 0x68, - D3D_SHADER_MODEL_6_9 = 0x69, - } D3D_SHADER_MODEL; - - static const D3D_SHADER_MODEL HIGHEST_SHADER_MODEL = D3D_SHADER_MODEL_6_9; - - bool UseDxbc() { -#ifdef _HLK_CONF - return false; -#else - return GetTestParamBool(L"DXBC"); -#endif - } - - bool UseWarpByDefault() { -#ifdef _HLK_CONF - return false; -#else - return true; -#endif - } - - bool UseDebugIfaces() { return true; } + // We define D3D_SHADER_MODEL enum values as we don't generally have access to + // the latest D3D headers when adding tests for a new SM being added. + using D3D_SHADER_MODEL = ExecTestUtils::D3D_SHADER_MODEL; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_0 = + ExecTestUtils::D3D_SHADER_MODEL_6_0; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_1 = + ExecTestUtils::D3D_SHADER_MODEL_6_1; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_2 = + ExecTestUtils::D3D_SHADER_MODEL_6_2; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_3 = + ExecTestUtils::D3D_SHADER_MODEL_6_3; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_4 = + ExecTestUtils::D3D_SHADER_MODEL_6_4; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_5 = + ExecTestUtils::D3D_SHADER_MODEL_6_5; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_6 = + ExecTestUtils::D3D_SHADER_MODEL_6_6; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_7 = + ExecTestUtils::D3D_SHADER_MODEL_6_7; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_8 = + ExecTestUtils::D3D_SHADER_MODEL_6_8; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_SHADER_MODEL_6_9 = + ExecTestUtils::D3D_SHADER_MODEL_6_9; + static constexpr ExecTestUtils::D3D_SHADER_MODEL D3D_HIGHEST_SHADER_MODEL = + ExecTestUtils::D3D_HIGHEST_SHADER_MODEL; bool SaveImages() { return GetTestParamBool(L"SaveImages"); } @@ -766,7 +714,7 @@ class ExecutionTest { CComPtr pComputeShader; // Load and compile shaders. - if (UseDxbc()) { + if (useDxbc()) { #ifndef _HLK_CONF DXBCFromText(pShader, L"main", pTargetProfile, &pComputeShader); #endif @@ -784,112 +732,6 @@ class ExecutionTest { &computePsoDesc, IID_PPV_ARGS(ppComputeState))); } - bool CreateDevice(ID3D12Device **ppDevice, - D3D_SHADER_MODEL testModel = D3D_SHADER_MODEL_6_0, - bool skipUnsupported = true) { - if (testModel > HIGHEST_SHADER_MODEL) { - UINT minor = (UINT)testModel & 0x0f; - LogCommentFmt(L"Installed SDK does not support " - L"shader model 6.%1u", - minor); - - if (skipUnsupported) { - WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); - } - - return false; - } - CComPtr factory; - CComPtr pDevice; - - *ppDevice = nullptr; - - VERIFY_SUCCEEDED(CreateDXGIFactory1(IID_PPV_ARGS(&factory))); - if (GetTestParamUseWARP(UseWarpByDefault())) { - CComPtr warpAdapter; - VERIFY_SUCCEEDED(factory->EnumWarpAdapter(IID_PPV_ARGS(&warpAdapter))); - HRESULT createHR = D3D12CreateDevice(warpAdapter, D3D_FEATURE_LEVEL_11_0, - IID_PPV_ARGS(&pDevice)); - if (FAILED(createHR)) { - LogCommentFmt(L"The available version of WARP does not support d3d12."); - - if (skipUnsupported) { - WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); - } - - return false; - } - - if (GetModuleHandleW(L"d3d10warp.dll") != NULL) { - WCHAR szFullModuleFilePath[MAX_PATH] = L""; - GetModuleFileNameW(GetModuleHandleW(L"d3d10warp.dll"), - szFullModuleFilePath, sizeof(szFullModuleFilePath)); - WEX::Logging::Log::Comment(WEX::Common::String().Format( - L"WARP driver loaded from: %S", szFullModuleFilePath)); - } - - } else { - CComPtr hardwareAdapter; - WEX::Common::String AdapterValue; - HRESULT hr = WEX::TestExecution::RuntimeParameters::TryGetValue( - L"Adapter", AdapterValue); - if (SUCCEEDED(hr)) { - st::GetHardwareAdapter(factory, AdapterValue, &hardwareAdapter); - } else { - WEX::Logging::Log::Comment( - L"Using default hardware adapter with D3D12 support."); - } - - VERIFY_SUCCEEDED(D3D12CreateDevice( - hardwareAdapter, D3D_FEATURE_LEVEL_11_0, IID_PPV_ARGS(&pDevice))); - } - // retrieve adapter information - LUID adapterID = pDevice->GetAdapterLuid(); - CComPtr adapter; - factory->EnumAdapterByLuid(adapterID, IID_PPV_ARGS(&adapter)); - DXGI_ADAPTER_DESC AdapterDesc; - VERIFY_SUCCEEDED(adapter->GetDesc(&AdapterDesc)); - LogCommentFmt(L"Using Adapter:%s", AdapterDesc.Description); - - if (pDevice == nullptr) - return false; - - if (!UseDxbc()) { - // Check for DXIL support. - typedef struct D3D12_FEATURE_DATA_SHADER_MODEL { - D3D_SHADER_MODEL HighestShaderModel; - } D3D12_FEATURE_DATA_SHADER_MODEL; - const UINT D3D12_FEATURE_SHADER_MODEL = 7; - D3D12_FEATURE_DATA_SHADER_MODEL SMData; - SMData.HighestShaderModel = testModel; - if (FAILED(pDevice->CheckFeatureSupport( - (D3D12_FEATURE)D3D12_FEATURE_SHADER_MODEL, &SMData, - sizeof(SMData))) || - SMData.HighestShaderModel < testModel) { - UINT minor = (UINT)testModel & 0x0f; - LogCommentFmt(L"The selected device does not support " - L"shader model 6.%1u", - minor); - - if (skipUnsupported) { - WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); - } - - return false; - } - } - - if (UseDebugIfaces()) { - CComPtr pInfoQueue; - if (SUCCEEDED(pDevice->QueryInterface(&pInfoQueue))) { - pInfoQueue->SetMuteDebugOutput(FALSE); - } - } - - *ppDevice = pDevice.Detach(); - return true; - } - void CreateGraphicsCommandQueue(ID3D12Device *pDevice, ID3D12CommandQueue **ppCommandQueue) { D3D12_COMMAND_QUEUE_DESC queueDesc = {}; @@ -919,7 +761,7 @@ class ExecutionTest { CComPtr vertexShader; CComPtr pixelShader; - if (UseDxbc()) { + if (useDxbc()) { #ifndef _HLK_CONF DXBCFromText(pShaders, L"VSMain", L"vs_6_0", &vertexShader); DXBCFromText(pShaders, L"PSMain", L"ps_6_0", &pixelShader); @@ -1642,7 +1484,7 @@ class ExecutionTest { // The debug layer does net yet validate DXIL programs that require // rewriting, but basic logging should work properly. HRESULT hr = S_FALSE; - if (UseDebugIfaces()) { + if (useDebugIfaces()) { CComPtr debugController; hr = D3D12GetDebugInterface(IID_PPV_ARGS(&debugController)); if (SUCCEEDED(hr)) { @@ -1830,20 +1672,6 @@ class ExecutionTest { } } - void ReadHlslDataIntoNewStream(LPCWSTR relativePath, IStream **ppStream) { - VERIFY_SUCCEEDED(m_support.Initialize()); - CComPtr pLibrary; - CComPtr pBlob; - CComPtr pStream; - std::wstring path = GetPathToHlslDataFile(relativePath, HLSLDATAFILEPARAM, - DEFAULT_EXEC_TEST_DIR); - VERIFY_SUCCEEDED(m_support.CreateInstance(CLSID_DxcLibrary, &pLibrary)); - VERIFY_SUCCEEDED( - pLibrary->CreateBlobFromFile(path.c_str(), nullptr, &pBlob)); - VERIFY_SUCCEEDED(pLibrary->CreateStreamFromBlobReadOnly(pBlob, &pStream)); - *ppStream = pStream.Detach(); - } - void RecordRenderAndReadback(ID3D12GraphicsCommandList *pList, ID3D12DescriptorHeap *pRtvHeap, UINT rtvDescriptorSize, UINT instanceCount, @@ -2348,15 +2176,15 @@ TEST_F(ExecutionTest, LifetimeIntrinsicTest) { static const int DispatchGroupCount = 1; CComPtr pDevice; - bool bSM_6_6_Supported = CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6, false); + bool bSM_6_6_Supported = createDevice(&pDevice, D3D_SHADER_MODEL_6_6, false); bool bSM_6_3_Supported = bSM_6_6_Supported; if (!bSM_6_6_Supported) { // Try 6.3 for downlevel DXR case - bSM_6_3_Supported = CreateDevice(&pDevice, D3D_SHADER_MODEL_6_3, false); + bSM_6_3_Supported = createDevice(&pDevice, D3D_SHADER_MODEL_6_3, false); } if (!bSM_6_3_Supported) { // Otherwise, 6.0 better be supported for compute case - VERIFY_IS_TRUE(CreateDevice(&pDevice, D3D_SHADER_MODEL_6_0, false)); + VERIFY_IS_TRUE(createDevice(&pDevice, D3D_SHADER_MODEL_6_0, false)); } bool bDXRSupported = bSM_6_3_Supported && DoesDeviceSupportRayTracing(pDevice); @@ -2465,7 +2293,7 @@ TEST_F(ExecutionTest, BasicComputeTest) { static const int DispatchGroupCount = 1; CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; std::vector values; @@ -2524,7 +2352,7 @@ TEST_F(ExecutionTest, BasicTriangleTest) { " return 1; //input.color;\r\n" "};\r\n"; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; struct BasicTestChecker { @@ -2668,7 +2496,7 @@ TEST_F(ExecutionTest, Int64Test) { static const int DispatchGroupCount = 1; CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; if (!DoesDeviceSupportInt64(pDevice)) { @@ -2693,7 +2521,7 @@ TEST_F(ExecutionTest, SignTest) { "}"; CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; const uint32_t neg1 = (uint32_t)-1; @@ -2714,7 +2542,7 @@ TEST_F(ExecutionTest, SignTest) { TEST_F(ExecutionTest, WaveIntrinsicsDDITest) { #ifndef _HLK_CONF CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; D3D12_FEATURE_DATA_D3D12_OPTIONS1 O; if (FAILED(pDevice->CheckFeatureSupport( @@ -2814,7 +2642,7 @@ TEST_F(ExecutionTest, WaveIntrinsicsTest) { static const int DispatchGroupCount = 1; CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; if (!DoesDeviceSupportWaveOps(pDevice)) { @@ -2841,7 +2669,7 @@ TEST_F(ExecutionTest, WaveIntrinsicsTest) { CComPtr pUavHeap; CComPtr pCommandAllocator; FenceObj FO; - bool dxbc = UseDxbc(); + bool dxbc = useDxbc(); const size_t valueSizeInBytes = values.size() * sizeof(PerThreadData); CreateComputeCommandQueue(pDevice, L"WaveIntrinsicsTest Command Queue", @@ -3172,7 +3000,7 @@ TEST_F(ExecutionTest, WaveIntrinsicsInPSTest) { CComPtr pVertexBuffer; D3D12_VERTEX_BUFFER_VIEW vertexBufferView; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; if (!DoesDeviceSupportWaveOps(pDevice)) { // Optional feature, so it's correct to not support it if declared as such. @@ -3229,7 +3057,7 @@ TEST_F(ExecutionTest, WaveIntrinsicsInPSTest) { CreateVertexBuffer(pDevice, vertices, &pVertexBuffer, &vertexBufferView); - bool dxbc = UseDxbc(); + bool dxbc = useDxbc(); // Set up UAV resource. std::vector values; @@ -3491,12 +3319,6 @@ TEST_F(ExecutionTest, WaveIntrinsicsInPSTest) { } } -struct ShaderOpTestResult { - st::ShaderOp *ShaderOp; - std::shared_ptr ShaderOpSet; - std::shared_ptr Test; -}; - struct SPrimitives { float f_float; float f_float2; @@ -3504,87 +3326,19 @@ struct SPrimitives { float f_float2_o; }; -std::shared_ptr -RunShaderOpTestAfterParse(ID3D12Device *pDevice, dxc::DxcDllSupport &support, - LPCSTR pName, - st::ShaderOpTest::TInitCallbackFn pInitCallback, - st::ShaderOpTest::TShaderCallbackFn pShaderCallback, - std::shared_ptr ShaderOpSet) { - st::ShaderOp *pShaderOp; - if (pName == nullptr) { - if (ShaderOpSet->ShaderOps.size() != 1) { - VERIFY_FAIL(L"Expected a single shader operation."); - } - pShaderOp = ShaderOpSet->ShaderOps[0].get(); - } else { - pShaderOp = ShaderOpSet->GetShaderOp(pName); - } - if (pShaderOp == nullptr) { - std::string msg = "Unable to find shader op "; - msg += pName; - msg += "; available ops"; - const char sep = ':'; - for (auto &pAvailOp : ShaderOpSet->ShaderOps) { - msg += sep; - msg += pAvailOp->Name ? pAvailOp->Name : "[n/a]"; - } - CA2W msgWide(msg.c_str()); - VERIFY_FAIL(msgWide.m_psz); - } - - // This won't actually be used since we're supplying the device, - // but let's make it consistent. - pShaderOp->UseWarpDevice = GetTestParamUseWARP(true); - - std::shared_ptr test = std::make_shared(); - test->SetDxcSupport(&support); - test->SetInitCallback(pInitCallback); - test->SetShaderCallback(pShaderCallback); - test->SetDevice(pDevice); - test->RunShaderOp(pShaderOp); - - std::shared_ptr result = - std::make_shared(); - result->ShaderOpSet = ShaderOpSet; - result->Test = test; - result->ShaderOp = pShaderOp; - return result; -} - -std::shared_ptr -RunShaderOpTestAfterParse(ID3D12Device *pDevice, dxc::DxcDllSupport &support, - LPCSTR pName, - st::ShaderOpTest::TInitCallbackFn pInitCallback, - std::shared_ptr ShaderOpSet) { - return RunShaderOpTestAfterParse(pDevice, support, pName, pInitCallback, - nullptr, ShaderOpSet); -} - -std::shared_ptr -RunShaderOpTest(ID3D12Device *pDevice, dxc::DxcDllSupport &support, - IStream *pStream, LPCSTR pName, - st::ShaderOpTest::TInitCallbackFn pInitCallback) { - DXASSERT_NOMSG(pStream != nullptr); - std::shared_ptr ShaderOpSet = - std::make_shared(); - st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); - return RunShaderOpTestAfterParse(pDevice, support, pName, pInitCallback, - ShaderOpSet); -} - TEST_F(ExecutionTest, OutOfBoundsTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); // Single operation test at the moment. CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; - std::shared_ptr test = - RunShaderOpTest(pDevice, m_support, pStream, "OOB", nullptr); + std::shared_ptr test = + st::RunShaderOpTest(pDevice, m_support, pStream, "OOB", nullptr); MappedData data; // Read back to CPU and examine contents - should get pure red. { @@ -3601,15 +3355,15 @@ TEST_F(ExecutionTest, SaturateTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); // Single operation test at the moment. CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; - std::shared_ptr test = - RunShaderOpTest(pDevice, m_support, pStream, "Saturate", nullptr); + std::shared_ptr test = + st::RunShaderOpTest(pDevice, m_support, pStream, "Saturate", nullptr); MappedData data; test->Test->GetReadBackData("U0", &data); const float *pValues = (float *)data.data(); @@ -3636,11 +3390,11 @@ void ExecutionTest::BasicTriangleTestSetup(LPCSTR ShaderOpName, WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); // Single operation test at the moment. CComPtr pDevice; - if (!CreateDevice(&pDevice, testModel)) + if (!createDevice(&pDevice, testModel)) return; // As this is used, 6.2 requirement always comes with requiring native 16-bit @@ -3653,8 +3407,8 @@ void ExecutionTest::BasicTriangleTestSetup(LPCSTR ShaderOpName, return; } - std::shared_ptr test = - RunShaderOpTest(pDevice, m_support, pStream, ShaderOpName, nullptr); + std::shared_ptr test = + st::RunShaderOpTest(pDevice, m_support, pStream, ShaderOpName, nullptr); MappedData data; D3D12_RESOURCE_DESC &D = test->ShaderOp->GetResourceByName("RTarget")->Desc; UINT width = (UINT)D.Width; @@ -3786,14 +3540,14 @@ TEST_F(ExecutionTest, PartialDerivTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; - std::shared_ptr test = - RunShaderOpTest(pDevice, m_support, pStream, "DerivFine", nullptr); + std::shared_ptr test = + st::RunShaderOpTest(pDevice, m_support, pStream, "DerivFine", nullptr); MappedData data; D3D12_RESOURCE_DESC &D = test->ShaderOp->GetResourceByName("RTarget")->Desc; UINT width = (UINT)D.Width; @@ -3894,10 +3648,10 @@ TEST_F(ExecutionTest, DerivativesTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6)) return; std::shared_ptr ShaderOpSet = @@ -3977,10 +3731,10 @@ TEST_F(ExecutionTest, QuadReadTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; if (!DoesDeviceSupportWaveOps(pDevice)) { @@ -4033,8 +3787,9 @@ TEST_F(ExecutionTest, QuadReadTest) { // Test Compute Shader pShaderOp->CS = CS; - std::shared_ptr test = RunShaderOpTestAfterParse( - pDevice, m_support, "QuadRead", nullptr, ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse(pDevice, m_support, "QuadRead", nullptr, + ShaderOpSet); MappedData data; test->Test->GetReadBackData("U0", &data); @@ -4055,8 +3810,8 @@ TEST_F(ExecutionTest, QuadReadTest) { // Disable CS so mesh goes forward pShaderOp->CS = nullptr; - test = RunShaderOpTestAfterParse(pDevice, m_support, "QuadRead", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "QuadRead", + nullptr, ShaderOpSet); test->Test->GetReadBackData("U1", &data); pPixels = (UINT *)data.data(); // Test first, second and center quads @@ -4124,10 +3879,10 @@ TEST_F(ExecutionTest, ComputeSampleTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6)) return; std::shared_ptr ShaderOpSet = @@ -4175,7 +3930,7 @@ TEST_F(ExecutionTest, ComputeSampleTest) { } // Test 1D compute shader - std::shared_ptr test = RunShaderOpTestAfterParse( + std::shared_ptr test = st::RunShaderOpTestAfterParse( pDevice, m_support, "ComputeSample", SampleInitFn, ShaderOpSet); MappedData data; @@ -4190,8 +3945,8 @@ TEST_F(ExecutionTest, ComputeSampleTest) { pShaderOp->CS = CS2; test.reset(); - test = RunShaderOpTestAfterParse(pDevice, m_support, "ComputeSample", - SampleInitFn, ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "ComputeSample", + SampleInitFn, ShaderOpSet); test->Test->GetReadBackData("U0", &data); pPixels = (UINT *)data.data(); @@ -4203,8 +3958,8 @@ TEST_F(ExecutionTest, ComputeSampleTest) { if (DoesDeviceSupportMeshAmpDerivatives(pDevice)) { // Disable CS so mesh goes forward pShaderOp->CS = nullptr; - test = RunShaderOpTestAfterParse(pDevice, m_support, "ComputeSample", - SampleInitFn, ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "ComputeSample", + SampleInitFn, ShaderOpSet); test->Test->GetReadBackData("U1", &data); pPixels = (UINT *)data.data(); @@ -4221,8 +3976,8 @@ TEST_F(ExecutionTest, ComputeSampleTest) { pShaderOp->AS = AS2; pShaderOp->MS = MS2; - test = RunShaderOpTestAfterParse(pDevice, m_support, "ComputeSample", - SampleInitFn, ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "ComputeSample", + SampleInitFn, ShaderOpSet); test->Test->GetReadBackData("U1", &data); pPixels = (UINT *)data.data(); @@ -4251,7 +4006,7 @@ TEST_F(ExecutionTest, ATOWriteMSAATest) { #else D3D_SHADER_MODEL sm = D3D_SHADER_MODEL_6_7; #endif - if (!CreateDevice(&pDevice, sm)) + if (!createDevice(&pDevice, sm)) return; #ifndef WRITEMSAA_FALLBACK @@ -4517,7 +4272,7 @@ TEST_F(ExecutionTest, ATOProgOffset) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); std::shared_ptr ShaderOpSet = std::make_shared(); @@ -4550,7 +4305,7 @@ TEST_F(ExecutionTest, ATOProgOffset) { D3D_SHADER_MODEL sm = TestShaderModels[i]; CComPtr pDevice; - if (!CreateDevice(&pDevice, sm, /*skipUnsupported*/ false)) { + if (!createDevice(&pDevice, sm, /*skipUnsupported*/ false)) { LogCommentFmt(L"Device does not support shader model 6.%1u", ((UINT)sm & 0x0f)); break; @@ -4603,8 +4358,9 @@ TEST_F(ExecutionTest, ATOProgOffset) { } // Test compute shader - std::shared_ptr test = RunShaderOpTestAfterParse( - pDevice, m_support, "ProgOffset", SampleInitFn, ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse(pDevice, m_support, "ProgOffset", + SampleInitFn, ShaderOpSet); MappedData data; test->Test->GetReadBackData("U0", &data); @@ -4614,8 +4370,8 @@ TEST_F(ExecutionTest, ATOProgOffset) { pShaderOp->CS = nullptr; if (DoesDeviceSupportMeshShaders(pDevice)) { - test = RunShaderOpTestAfterParse(pDevice, m_support, "ProgOffset", - SampleInitFn, ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "ProgOffset", + SampleInitFn, ShaderOpSet); // PS test->Test->GetReadBackData("U0", &data); @@ -4632,8 +4388,8 @@ TEST_F(ExecutionTest, ATOProgOffset) { // Disable MS so PS goes forward pShaderOp->MS = nullptr; - test = RunShaderOpTestAfterParse(pDevice, m_support, "ProgOffset", - SampleInitFn, ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "ProgOffset", + SampleInitFn, ShaderOpSet); test->Test->GetReadBackData("U0", &data); VerifyProgOffsetResults((UINT *)data.data(), true); @@ -4653,10 +4409,10 @@ TEST_F(ExecutionTest, ATOSampleCmpLevelTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_7)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_7)) return; if (!DoesDeviceSupportAdvancedTexOps(pDevice)) { @@ -4701,7 +4457,7 @@ TEST_F(ExecutionTest, ATOSampleCmpLevelTest) { }; // Test compute shader - std::shared_ptr test = RunShaderOpTestAfterParse( + std::shared_ptr test = st::RunShaderOpTestAfterParse( pDevice, m_support, "SampleCmpLevel", SampleInitFn, ShaderOpSet); MappedData data; @@ -4718,8 +4474,8 @@ TEST_F(ExecutionTest, ATOSampleCmpLevelTest) { if (DoesDeviceSupportMeshShaders(pDevice)) { // Disable CS so mesh goes forward pShaderOp->CS = nullptr; - test = RunShaderOpTestAfterParse(pDevice, m_support, "SampleCmpLevel", - SampleInitFn, ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "SampleCmpLevel", + SampleInitFn, ShaderOpSet); test->Test->GetReadBackData("U0", &data); pPixels = (UINT *)data.data(); @@ -5298,7 +5054,7 @@ TEST_F(ExecutionTest, ATORawGather) { D3D_SHADER_MODEL sm = D3D_SHADER_MODEL_6_7; #endif CComPtr pDevice; - if (!CreateDevice(&pDevice, sm)) + if (!createDevice(&pDevice, sm)) return; #ifndef RAWGATHER_FALLBACK @@ -5528,7 +5284,7 @@ void ExecutionTest::RunBasicShaderModelTest(D3D_SHADER_MODEL shaderModel) { WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pDevice; - if (!CreateDevice(&pDevice, shaderModel)) { + if (!createDevice(&pDevice, shaderModel)) { return; } @@ -5628,9 +5384,9 @@ void ExecutionTest::RunBasicShaderModelTest(CComPtr pDevice, }; CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", // this callback is called when the test is creating the resource to run // the test @@ -5958,178 +5714,6 @@ struct SPackUnpackOpOutUnpacked { std::array outputClampedUint16; std::array outputClampedInt16; }; - -// Parameter representation for taef data-driven tests -struct TableParameter { - LPCWSTR m_name; - enum TableParameterType { - INT8, - INT16, - INT32, - UINT, - FLOAT, - HALF, - DOUBLE, - STRING, - BOOL, - INT8_TABLE, - INT16_TABLE, - INT32_TABLE, - FLOAT_TABLE, - HALF_TABLE, - DOUBLE_TABLE, - STRING_TABLE, - UINT8_TABLE, - UINT16_TABLE, - UINT32_TABLE, - BOOL_TABLE - }; - TableParameter(LPCWSTR name, TableParameterType type, bool required) - : m_name(name), m_type(type), m_required(required) {} - TableParameterType m_type; - bool m_required; // required parameter - int8_t m_int8; - int16_t m_int16; - int m_int32; - unsigned int m_uint; - float m_float; - uint16_t m_half; // no such thing as half type in c++. Use int16 instead - double m_double; - bool m_bool; - WEX::Common::String m_str; - std::vector m_int8Table; - std::vector m_int16Table; - std::vector m_int32Table; - std::vector m_uint8Table; - std::vector m_uint16Table; - std::vector m_uint32Table; - std::vector m_floatTable; - std::vector m_halfTable; // no such thing as half type in c++ - std::vector m_doubleTable; - std::vector m_boolTable; - std::vector m_StringTable; -}; - -class TableParameterHandler { -private: - HRESULT ParseTableRow(); - -public: - TableParameter *m_table; - size_t m_tableSize; - TableParameterHandler(TableParameter *pTable, size_t size) - : m_table(pTable), m_tableSize(size) { - clearTableParameter(); - VERIFY_SUCCEEDED(ParseTableRow()); - } - - TableParameter *GetTableParamByName(LPCWSTR name) { - for (size_t i = 0; i < m_tableSize; ++i) { - if (_wcsicmp(name, m_table[i].m_name) == 0) { - return &m_table[i]; - } - } - DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); - return nullptr; - } - - void clearTableParameter() { - for (size_t i = 0; i < m_tableSize; ++i) { - m_table[i].m_int32 = 0; - m_table[i].m_uint = 0; - m_table[i].m_double = 0; - m_table[i].m_bool = false; - m_table[i].m_str = WEX::Common::String(); - } - } - - template std::vector *GetDataArray(LPCWSTR name) { - return nullptr; - } - - template <> std::vector *GetDataArray(LPCWSTR name) { - for (size_t i = 0; i < m_tableSize; ++i) { - if (_wcsicmp(name, m_table[i].m_name) == 0) { - return &(m_table[i].m_int32Table); - } - } - DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); - return nullptr; - } - - template <> std::vector *GetDataArray(LPCWSTR name) { - for (size_t i = 0; i < m_tableSize; ++i) { - if (_wcsicmp(name, m_table[i].m_name) == 0) { - return &(m_table[i].m_int8Table); - } - } - DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); - return nullptr; - } - - template <> std::vector *GetDataArray(LPCWSTR name) { - for (size_t i = 0; i < m_tableSize; ++i) { - if (_wcsicmp(name, m_table[i].m_name) == 0) { - return &(m_table[i].m_int16Table); - } - } - DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); - return nullptr; - } - - template <> std::vector *GetDataArray(LPCWSTR name) { - for (size_t i = 0; i < m_tableSize; ++i) { - if (_wcsicmp(name, m_table[i].m_name) == 0) { - return &(m_table[i].m_uint32Table); - } - } - DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); - return nullptr; - } - - template <> std::vector *GetDataArray(LPCWSTR name) { - for (size_t i = 0; i < m_tableSize; ++i) { - if (_wcsicmp(name, m_table[i].m_name) == 0) { - return &(m_table[i].m_floatTable); - } - } - DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); - return nullptr; - } - - // TODO: uin16_t may be used to represent two different types when we - // introduce uint16 - template <> std::vector *GetDataArray(LPCWSTR name) { - for (size_t i = 0; i < m_tableSize; ++i) { - if (_wcsicmp(name, m_table[i].m_name) == 0) { - return &(m_table[i].m_halfTable); - } - } - DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); - return nullptr; - } - - template <> std::vector *GetDataArray(LPCWSTR name) { - for (size_t i = 0; i < m_tableSize; ++i) { - if (_wcsicmp(name, m_table[i].m_name) == 0) { - return &(m_table[i].m_doubleTable); - } - } - DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); - return nullptr; - } - - template <> std::vector *GetDataArray(LPCWSTR name) { - for (size_t i = 0; i < m_tableSize; ++i) { - if (_wcsicmp(name, m_table[i].m_name) == 0) { - return &(m_table[i].m_boolTable); - } - } - DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); - return nullptr; - } -}; - static TableParameter UnaryFPOpParameters[] = { {L"ShaderOp.Target", TableParameter::STRING, true}, {L"ShaderOp.Text", TableParameter::STRING, true}, @@ -6460,381 +6044,6 @@ static TableParameter PackUnpackOpParameters[] = { {L"Validation.Input", TableParameter::UINT32_TABLE, true}, }; -static bool IsHexString(PCWSTR str, uint16_t *value) { - std::wstring wString(str); - wString.erase(std::remove(wString.begin(), wString.end(), L' '), - wString.end()); - LPCWSTR wstr = wString.c_str(); - if (wcsncmp(wstr, L"0x", 2) == 0 || wcsncmp(wstr, L"0b", 2) == 0) { - *value = (uint16_t)wcstol(wstr, NULL, 0); - return true; - } - return false; -} - -static HRESULT ParseDataToFloat(PCWSTR str, float &value) { - std::wstring wString(str); - wString.erase(std::remove(wString.begin(), wString.end(), L' '), - wString.end()); - wString.erase(std::remove(wString.begin(), wString.end(), L'\n'), - wString.end()); - PCWSTR wstr = wString.data(); - if (_wcsicmp(wstr, L"NaN") == 0) { - value = NAN; - } else if (_wcsicmp(wstr, L"-inf") == 0) { - value = -(INFINITY); - } else if (_wcsicmp(wstr, L"inf") == 0) { - value = INFINITY; - } else if (_wcsicmp(wstr, L"-denorm") == 0) { - value = -(FLT_MIN / 2); - } else if (_wcsicmp(wstr, L"denorm") == 0) { - value = FLT_MIN / 2; - } else if (_wcsicmp(wstr, L"-0.0f") == 0 || _wcsicmp(wstr, L"-0.0") == 0 || - _wcsicmp(wstr, L"-0") == 0) { - value = -0.0f; - } else if (_wcsicmp(wstr, L"0.0f") == 0 || _wcsicmp(wstr, L"0.0") == 0 || - _wcsicmp(wstr, L"0") == 0) { - value = 0.0f; - } else if (_wcsnicmp(wstr, L"0x", 2) == - 0) { // For hex values, take values literally - unsigned temp_i = std::stoul(wstr, nullptr, 16); - value = (float &)temp_i; - } else { - // evaluate the expression of wstring - double val = _wtof(wstr); - if (val == 0) { - LogErrorFmt(L"Failed to parse parameter %s to float", wstr); - return E_FAIL; - } - value = (float)val; - } - return S_OK; -} - -static HRESULT ParseDataToUint(PCWSTR str, unsigned int &value) { - std::wstring wString(str); - wString.erase(std::remove(wString.begin(), wString.end(), L' '), - wString.end()); - PCWSTR wstr = wString.data(); - // evaluate the expression of string - if (_wcsicmp(wstr, L"0") == 0 || _wcsicmp(wstr, L"0x00000000") == 0) { - value = 0; - return S_OK; - } - wchar_t *end; - unsigned int val = std::wcstoul(wstr, &end, 0); - if (val == 0) { - LogErrorFmt(L"Failed to parse parameter %s to int", wstr); - return E_FAIL; - } - value = val; - return S_OK; -} - -static HRESULT ParseDataToVectorFloat(PCWSTR str, float *ptr, size_t count) { - std::wstring wstr(str); - size_t curPosition = 0; - // parse a string of dot product separated by commas - for (size_t i = 0; i < count; ++i) { - size_t nextPosition = wstr.find(L",", curPosition); - if (FAILED(ParseDataToFloat( - wstr.substr(curPosition, nextPosition - curPosition).data(), - *(ptr + i)))) { - return E_FAIL; - } - curPosition = nextPosition + 1; - } - return S_OK; -} - -static HRESULT ParseDataToVectorHalf(PCWSTR str, uint16_t *ptr, size_t count) { - std::wstring wstr(str); - size_t curPosition = 0; - // parse a string of dot product separated by commas - for (size_t i = 0; i < count; ++i) { - size_t nextPosition = wstr.find(L",", curPosition); - float floatValue; - if (FAILED(ParseDataToFloat( - wstr.substr(curPosition, nextPosition - curPosition).data(), - floatValue))) { - return E_FAIL; - } - *(ptr + i) = ConvertFloat32ToFloat16(floatValue); - curPosition = nextPosition + 1; - } - return S_OK; -} - -static HRESULT ParseDataToVectorUint(PCWSTR str, unsigned int *ptr, - size_t count) { - std::wstring wstr(str); - size_t curPosition = 0; - // parse a string of dot product separated by commas - for (size_t i = 0; i < count; ++i) { - size_t nextPosition = wstr.find(L",", curPosition); - if (FAILED(ParseDataToUint( - wstr.substr(curPosition, nextPosition - curPosition).data(), - *(ptr + i)))) { - return E_FAIL; - } - curPosition = nextPosition + 1; - } - return S_OK; -} - -HRESULT TableParameterHandler::ParseTableRow() { - TableParameter *table = m_table; - for (unsigned int i = 0; i < m_tableSize; ++i) { - switch (table[i].m_type) { - case TableParameter::INT8: - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - table[i].m_int32)) && - table[i].m_required) { - // TryGetValue does not suppport reading from int16 - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - table[i].m_int8 = (int8_t)(table[i].m_int32); - break; - case TableParameter::INT16: - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - table[i].m_int32)) && - table[i].m_required) { - // TryGetValue does not suppport reading from int16 - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - table[i].m_int16 = (short)(table[i].m_int32); - break; - case TableParameter::INT32: - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - table[i].m_int32)) && - table[i].m_required) { - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - break; - case TableParameter::UINT: - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - table[i].m_uint)) && - table[i].m_required) { - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - break; - case TableParameter::DOUBLE: - if (FAILED(WEX::TestExecution::TestData::TryGetValue( - table[i].m_name, table[i].m_double)) && - table[i].m_required) { - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - break; - case TableParameter::STRING: - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - table[i].m_str)) && - table[i].m_required) { - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - break; - case TableParameter::BOOL: - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - table[i].m_str)) && - table[i].m_bool) { - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - break; - case TableParameter::INT8_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - // TryGetValue does not suppport reading from int8 - table[i].m_int8Table.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - table[i].m_int8Table[j] = (int8_t)tempTable[j]; - } - break; - } - case TableParameter::INT16_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - // TryGetValue does not suppport reading from int8 - table[i].m_int16Table.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - table[i].m_int16Table[j] = (int16_t)tempTable[j]; - } - break; - } - case TableParameter::INT32_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - // TryGetValue does not suppport reading from int8 - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - table[i].m_int32Table.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - table[i].m_int32Table[j] = tempTable[j]; - } - break; - } - case TableParameter::UINT8_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - // TryGetValue does not suppport reading from int8 - table[i].m_int8Table.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - table[i].m_int8Table[j] = (uint8_t)tempTable[j]; - } - break; - } - case TableParameter::UINT16_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - // TryGetValue does not suppport reading from int8 - table[i].m_uint16Table.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - table[i].m_uint16Table[j] = (uint16_t)tempTable[j]; - } - break; - } - case TableParameter::UINT32_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - // TryGetValue does not suppport reading from int8 - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - table[i].m_uint32Table.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - table[i].m_uint32Table[j] = tempTable[j]; - } - break; - } - case TableParameter::FLOAT_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - // TryGetValue does not suppport reading from int8 - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - table[i].m_floatTable.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - ParseDataToFloat(tempTable[j], table[i].m_floatTable[j]); - } - break; - } - case TableParameter::HALF_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - // TryGetValue does not suppport reading from int8 - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - table[i].m_halfTable.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - uint16_t value = 0; - if (IsHexString(tempTable[j], &value)) { - table[i].m_halfTable[j] = value; - } else { - float val; - ParseDataToFloat(tempTable[j], val); - if (isdenorm(val)) - table[i].m_halfTable[j] = - signbit(val) ? Float16NegDenorm : Float16PosDenorm; - else - table[i].m_halfTable[j] = ConvertFloat32ToFloat16(val); - } - } - break; - } - case TableParameter::DOUBLE_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - // TryGetValue does not suppport reading from int8 - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - table[i].m_doubleTable.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - table[i].m_doubleTable[j] = tempTable[j]; - } - break; - } - case TableParameter::BOOL_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - // TryGetValue does not suppport reading from int8 - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - table[i].m_boolTable.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - table[i].m_boolTable[j] = tempTable[j]; - } - break; - } - case TableParameter::STRING_TABLE: { - WEX::TestExecution::TestDataArray tempTable; - if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, - tempTable)) && - table[i].m_required) { - // TryGetValue does not suppport reading from int8 - LogErrorFmt(L"Failed to get %s", table[i].m_name); - return E_FAIL; - } - table[i].m_StringTable.resize(tempTable.GetSize()); - for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { - table[i].m_StringTable[j] = tempTable[j]; - } - break; - } - default: - DXASSERT_NOMSG("Invalid Parameter Type"); - } - if (errno == ERANGE) { - LogErrorFmt(L"got out of range value for table %s", table[i].m_name); - return E_FAIL; - } - } - return S_OK; -} - static bool CompareOutputWithExpectedValueInt(int output, int ref, int tolerance) { return ((output - ref) <= tolerance) && ((ref - output) <= tolerance); @@ -6972,10 +6181,10 @@ TEST_F(ExecutionTest, UnaryFloatOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } // Read data from the table @@ -6997,7 +6206,7 @@ TEST_F(ExecutionTest, UnaryFloatOpTest) { size_t count = Validation_Input->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "UnaryFPOp", // this callback is called when the test // is creating the resource to run the test @@ -7035,10 +6244,10 @@ TEST_F(ExecutionTest, BinaryFloatOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } // Read data from the table @@ -7065,7 +6274,7 @@ TEST_F(ExecutionTest, BinaryFloatOpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_double; size_t count = Validation_Input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", // this callback is called when the test // is creating the resource to run the test @@ -7125,10 +6334,10 @@ TEST_F(ExecutionTest, TertiaryFloatOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } // Read data from the table @@ -7155,7 +6364,7 @@ TEST_F(ExecutionTest, TertiaryFloatOpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_double; size_t count = Validation_Input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "TertiaryFPOp", // this callback is called when the test // is creating the resource to run the test @@ -7198,10 +6407,10 @@ TEST_F(ExecutionTest, UnaryHalfOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -7232,7 +6441,7 @@ TEST_F(ExecutionTest, UnaryHalfOpTest) { size_t count = Validation_Input->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "UnaryFPOp", // this callback is called when the test // is creating the resource to run the test @@ -7273,10 +6482,10 @@ TEST_F(ExecutionTest, BinaryHalfOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -7312,7 +6521,7 @@ TEST_F(ExecutionTest, BinaryHalfOpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_double; size_t count = Validation_Input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", // this callback is called when the test // is creating the resource to run the test @@ -7384,10 +6593,10 @@ TEST_F(ExecutionTest, TertiaryHalfOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -7422,7 +6631,7 @@ TEST_F(ExecutionTest, TertiaryHalfOpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_double; size_t count = Validation_Input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "TertiaryFPOp", // this callback is called when the test // is creating the resource to run the test @@ -7470,10 +6679,10 @@ TEST_F(ExecutionTest, UnaryIntOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } // Read data from the table @@ -7492,7 +6701,7 @@ TEST_F(ExecutionTest, UnaryIntOpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "UnaryIntOp", // this callback is called when the test // is creating the resource to run the test @@ -7530,10 +6739,10 @@ TEST_F(ExecutionTest, UnaryUintOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } // Read data from the table @@ -7552,7 +6761,7 @@ TEST_F(ExecutionTest, UnaryUintOpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "UnaryUintOp", // this callback is called when the test // is creating the resource to run the test @@ -7590,10 +6799,10 @@ TEST_F(ExecutionTest, BinaryIntOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } // Read data from the table @@ -7617,7 +6826,7 @@ TEST_F(ExecutionTest, BinaryIntOpTest) { size_t numExpected = Validation_Expected2->size() == 0 ? 1 : 2; - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "BinaryIntOp", // this callback is called when the test // is creating the resource to run the test @@ -7680,10 +6889,10 @@ TEST_F(ExecutionTest, TertiaryIntOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } // Read data from the table @@ -7705,7 +6914,7 @@ TEST_F(ExecutionTest, TertiaryIntOpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "TertiaryIntOp", // this callback is called when the test // is creating the resource to run the test @@ -7750,10 +6959,10 @@ TEST_F(ExecutionTest, BinaryUintOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } // Read data from the table @@ -7775,7 +6984,7 @@ TEST_F(ExecutionTest, BinaryUintOpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input1->size(); int numExpected = Validation_Expected2->size() == 0 ? 1 : 2; - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "BinaryUintOp", // this callback is called when the test // is creating the resource to run the test @@ -7842,10 +7051,10 @@ TEST_F(ExecutionTest, TertiaryUintOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } // Read data from the table @@ -7867,7 +7076,7 @@ TEST_F(ExecutionTest, TertiaryUintOpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "TertiaryUintOp", // this callback is called when the test // is creating the resource to run the test @@ -7916,10 +7125,10 @@ TEST_F(ExecutionTest, UnaryInt16OpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -7946,7 +7155,7 @@ TEST_F(ExecutionTest, UnaryInt16OpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "UnaryIntOp", // this callback is called when the test // is creating the resource to run the test @@ -7984,10 +7193,10 @@ TEST_F(ExecutionTest, UnaryUint16OpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -8014,7 +7223,7 @@ TEST_F(ExecutionTest, UnaryUint16OpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "UnaryUintOp", // this callback is called when the test // is creating the resource to run the test @@ -8053,10 +7262,10 @@ TEST_F(ExecutionTest, BinaryInt16OpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -8089,7 +7298,7 @@ TEST_F(ExecutionTest, BinaryInt16OpTest) { size_t numExpected = Validation_Expected2->size() == 0 ? 1 : 2; - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "BinaryIntOp", // this callback is called when the test // is creating the resource to run the test @@ -8151,10 +7360,10 @@ TEST_F(ExecutionTest, TertiaryInt16OpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -8185,7 +7394,7 @@ TEST_F(ExecutionTest, TertiaryInt16OpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "TertiaryIntOp", // this callback is called when the test // is creating the resource to run the test @@ -8228,10 +7437,10 @@ TEST_F(ExecutionTest, BinaryUint16OpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -8262,7 +7471,7 @@ TEST_F(ExecutionTest, BinaryUint16OpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input1->size(); int numExpected = Validation_Expected2->size() == 0 ? 1 : 2; - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "BinaryUintOp", // this callback is called when the test // is creating the resource to run the test @@ -8326,10 +7535,10 @@ TEST_F(ExecutionTest, TertiaryUint16OpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -8361,7 +7570,7 @@ TEST_F(ExecutionTest, TertiaryUint16OpTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_int32; size_t count = Validation_Input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "TertiaryUintOp", // this callback is called when the test // is creating the resource to run the test @@ -8916,10 +8125,10 @@ TEST_F(ExecutionTest, DotTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } @@ -8946,7 +8155,7 @@ TEST_F(ExecutionTest, DotTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_double; size_t count = Validation_Input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "DotOp", // this callback is called when the test // is creating the resource to run the test @@ -9000,10 +8209,10 @@ TEST_F(ExecutionTest, Dot2AddHalfTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_4, false)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_4, false)) { return; } @@ -9036,7 +8245,7 @@ TEST_F(ExecutionTest, Dot2AddHalfTest) { handler.GetTableParamByName(L"Validation.Tolerance")->m_double; size_t count = validation_input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "Dot2AddHalfOp", // this callback is called when the test // is creating the resource to run the test @@ -9088,10 +8297,10 @@ TEST_F(ExecutionTest, Dot4AddI8PackedTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_4, false)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_4, false)) { return; } @@ -9112,7 +8321,7 @@ TEST_F(ExecutionTest, Dot4AddI8PackedTest) { size_t count = validation_input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "Dot4AddI8PackedOp", // this callback is called when the test // is creating the resource to run the test @@ -9151,10 +8360,10 @@ TEST_F(ExecutionTest, Dot4AddU8PackedTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_4, false)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_4, false)) { return; } @@ -9175,7 +8384,7 @@ TEST_F(ExecutionTest, Dot4AddU8PackedTest) { size_t count = validation_input1->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "Dot4AddU8PackedOp", // this callback is called when the test // is creating the resource to run the test @@ -9214,10 +8423,10 @@ TEST_F(ExecutionTest, Msad4Test) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } size_t tableSize = sizeof(Msad4OpParameters) / sizeof(TableParameter); @@ -9238,7 +8447,7 @@ TEST_F(ExecutionTest, Msad4Test) { size_t count = Validation_Expected->size(); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "Msad4", // this callback is called when the test // is creating the resource to run the test @@ -9296,10 +8505,10 @@ TEST_F(ExecutionTest, DenormBinaryFloatOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -9340,7 +8549,7 @@ TEST_F(ExecutionTest, DenormBinaryFloatOpTest) { "must have same number of expected values"); } - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "BinaryFPOp", // this callback is called when the test // is creating the resource to run the test @@ -9407,10 +8616,10 @@ TEST_F(ExecutionTest, DenormTertiaryFloatOpTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL::D3D_SHADER_MODEL_6_2)) { return; } @@ -9453,7 +8662,7 @@ TEST_F(ExecutionTest, DenormTertiaryFloatOpTest) { "must have same number of expected values"); } - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "TertiaryFPOp", // this callback is called when the test // is creating the resource to run the test @@ -9846,10 +9055,10 @@ void ExecutionTest::WaveIntrinsicsActivePrefixTest( static const unsigned int DispatchGroupCount = 1; static const unsigned int ThreadCount = ThreadsPerGroup * DispatchGroupCount; CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } if (!DoesDeviceSupportWaveOps(pDevice)) { @@ -9881,31 +9090,33 @@ void ExecutionTest::WaveIntrinsicsActivePrefixTest( for (size_t maskIndex = 0; maskIndex < sizeof(MaskFunctionTable) / sizeof(MaskFunction); ++maskIndex) { - std::shared_ptr test = RunShaderOpTestAfterParse( - pDevice, m_support, "WaveIntrinsicsOp", - // this callback is called when the test - // is creating the resource to run the test - [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { - VERIFY_IS_TRUE(0 == _stricmp(Name, "SWaveIntrinsicsOp")); - size_t size = sizeof(PerThreadData) * ThreadCount; - Data.resize(size); - PerThreadData *pPrimitives = (PerThreadData *)Data.data(); - // 4 different inputs for each operation test - size_t index = 0; - std::vector *IntList = InputDataList[setIndex]; - while (index < ThreadCount) { - PerThreadData *p = &pPrimitives[index]; - p->firstLaneId = 0xFFFFBFFF; - p->laneIndex = 0xFFFFBFFF; - p->mask = MaskFunctionTable[maskIndex]((int)index); - p->input = (*IntList)[index % IntList->size()]; - p->output = 0xFFFFBFFF; - index++; - } - // use shader from data table - pShaderOp->Shaders.at(0).Text = Text.m_psz; - }, - ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse( + pDevice, m_support, "WaveIntrinsicsOp", + // this callback is called when the test + // is creating the resource to run the test + [&](LPCSTR Name, std::vector &Data, + st::ShaderOp *pShaderOp) { + VERIFY_IS_TRUE(0 == _stricmp(Name, "SWaveIntrinsicsOp")); + size_t size = sizeof(PerThreadData) * ThreadCount; + Data.resize(size); + PerThreadData *pPrimitives = (PerThreadData *)Data.data(); + // 4 different inputs for each operation test + size_t index = 0; + std::vector *IntList = InputDataList[setIndex]; + while (index < ThreadCount) { + PerThreadData *p = &pPrimitives[index]; + p->firstLaneId = 0xFFFFBFFF; + p->laneIndex = 0xFFFFBFFF; + p->mask = MaskFunctionTable[maskIndex]((int)index); + p->input = (*IntList)[index % IntList->size()]; + p->output = 0xFFFFBFFF; + index++; + } + // use shader from data table + pShaderOp->Shaders.at(0).Text = Text.m_psz; + }, + ShaderOpSet); // Check the value MappedData data; @@ -10106,11 +9317,11 @@ void ExecutionTest::WaveIntrinsicsMultiPrefixOpTest( constexpr size_t ThreadCount = ThreadsPerGroup * DispatchGroupSize; CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_5)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_5)) { return; } @@ -10134,30 +9345,31 @@ void ExecutionTest::WaveIntrinsicsMultiPrefixOpTest( for (size_t maskIndex = 0; maskIndex < _countof(MaskFunctionTable); ++maskIndex) { - std::shared_ptr test = RunShaderOpTestAfterParse( - pDevice, m_support, "WaveIntrinsicsOp", - [&](LPCSTR name, std::vector &data, st::ShaderOp *pShaderOp) { - UNREFERENCED_PARAMETER(name); - - const size_t dataSize = sizeof(PerThreadData) * ThreadCount; - - data.resize(dataSize); - PerThreadData *pThreadData = - reinterpret_cast(data.data()); - - for (size_t i = 0; i != ThreadCount; ++i) { - pThreadData[i].key = keys->at(i % keys->size()); - pThreadData[i].value = values->at(i % values->size()); - pThreadData[i].firstLaneId = 0xdeadbeef; - pThreadData[i].laneId = 0xdeadbeef; - pThreadData[i].mask = MaskFunctionTable[maskIndex]((int)i); - pThreadData[i].result = 0xdeadbeef; - } + std::shared_ptr test = + st::RunShaderOpTestAfterParse( + pDevice, m_support, "WaveIntrinsicsOp", + [&](LPCSTR name, std::vector &data, st::ShaderOp *pShaderOp) { + UNREFERENCED_PARAMETER(name); + + const size_t dataSize = sizeof(PerThreadData) * ThreadCount; + + data.resize(dataSize); + PerThreadData *pThreadData = + reinterpret_cast(data.data()); + + for (size_t i = 0; i != ThreadCount; ++i) { + pThreadData[i].key = keys->at(i % keys->size()); + pThreadData[i].value = values->at(i % values->size()); + pThreadData[i].firstLaneId = 0xdeadbeef; + pThreadData[i].laneId = 0xdeadbeef; + pThreadData[i].mask = MaskFunctionTable[maskIndex]((int)i); + pThreadData[i].result = 0xdeadbeef; + } - pShaderOp->Shaders.at(0).Text = shaderSource; - pShaderOp->Shaders.at(0).Target = shaderProfile; - }, - ShaderOpSet); + pShaderOp->Shaders.at(0).Text = shaderSource; + pShaderOp->Shaders.at(0).Target = shaderProfile; + }, + ShaderOpSet); MappedData mappedData; test->Test->GetReadBackData("SWaveIntrinsicsOp", &mappedData); @@ -10234,11 +9446,11 @@ TEST_F(ExecutionTest, CBufferTestHalf) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); // Single operation test at the moment. CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_2)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_2)) return; if (!DoesDeviceSupportNative16bitOps(pDevice)) { @@ -10250,7 +9462,7 @@ TEST_F(ExecutionTest, CBufferTestHalf) { uint16_t InputData[] = {0x3F80, 0x3F00, 0x3D80, 0x7BFF}; - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "CBufferTestHalf", [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { UNREFERENCED_PARAMETER(pShaderOp); @@ -10280,7 +9492,7 @@ TEST_F(ExecutionTest, CBufferTestHalf) { } void TestBarycentricVariant(bool checkOrdering, - std::shared_ptr test) { + std::shared_ptr test) { MappedData data; D3D12_RESOURCE_DESC &D = test->ShaderOp->GetResourceByName("RTarget")->Desc; UINT width = (UINT)D.Width; @@ -10364,10 +9576,10 @@ TEST_F(ExecutionTest, BarycentricsTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_1)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_1)) return; if (!DoesDeviceSupportBarycentrics(pDevice)) { @@ -10386,9 +9598,9 @@ TEST_F(ExecutionTest, BarycentricsTest) { auto ResourceCallbackFnNoShift = MakeBarycentricsResourceInitCallbackFn(test_iteration); - std::shared_ptr test = - RunShaderOpTestAfterParse(pDevice, m_support, "Barycentrics", - ResourceCallbackFnNoShift, ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse(pDevice, m_support, "Barycentrics", + ResourceCallbackFnNoShift, ShaderOpSet); TestBarycentricVariant(false, test); // Now test that barycentric ordering is consistent @@ -10400,8 +9612,9 @@ TEST_F(ExecutionTest, BarycentricsTest) { auto ResourceCallbackFn = MakeBarycentricsResourceInitCallbackFn(test_iteration); - std::shared_ptr test2 = RunShaderOpTestAfterParse( - pDevice, m_support, "Barycentrics", ResourceCallbackFn, ShaderOpSet); + std::shared_ptr test2 = + st::RunShaderOpTestAfterParse(pDevice, m_support, "Barycentrics", + ResourceCallbackFn, ShaderOpSet); TestBarycentricVariant(true, test2); } } @@ -10647,7 +9860,7 @@ bool ExecutionTest::SetupRawBufferLdStTest(D3D_SHADER_MODEL shaderModel, CComPtr &pStream, const char *&sTy, const char *&additionalOptions) { - if (!CreateDevice(&pDevice, shaderModel)) { + if (!createDevice(&pDevice, shaderModel)) { return false; } @@ -10692,7 +9905,7 @@ bool ExecutionTest::SetupRawBufferLdStTest(D3D_SHADER_MODEL shaderModel, } // read shader config - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); return true; } @@ -10784,7 +9997,7 @@ void ExecutionTest::RunComputeRawBufferLdStTest( (int)sizeof(Ty), additionalOptions) != -1); // run the shader - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, shaderOpName, [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(((0 == strncmp(Name, "SRVBuffer", 9)) || @@ -10839,7 +10052,7 @@ void ExecutionTest::RunGraphicsRawBufferLdStTest( (int)sizeof(Ty), additionalOptions) != -1); // run the shader - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, shaderOpName, [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE(((0 == strncmp(Name, "SRVBuffer", 9)) || @@ -10921,7 +10134,7 @@ TEST_F(ExecutionTest, PackUnpackTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; @@ -10929,14 +10142,14 @@ TEST_F(ExecutionTest, PackUnpackTest) { string args = "-enable-16bit-types -DPACKUNPACK_PLACEHOLDER"; string target = "cs_6_2"; - if (!CreateDevice(&pDevice)) { + if (!createDevice(&pDevice)) { return; } #else string args = "-enable-16bit-types"; string target = "cs_6_6"; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) { + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6)) { return; } #endif @@ -10962,7 +10175,7 @@ TEST_F(ExecutionTest, PackUnpackTest) { std::vector expectedPacked(count / 4); std::vector expectedUnpacked(count / 4); - std::shared_ptr test = RunShaderOpTest( + std::shared_ptr test = st::RunShaderOpTest( pDevice, m_support, pStream, "PackUnpackOp", // this callback is called when the test // is creating the resource to run the test @@ -11316,7 +10529,7 @@ TEST_F(ExecutionTest, SignatureResourcesTest) { "}\n"; CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6)) return; RunResourceTest(pDevice, pShader.c_str(), L"cs_6_6", /*isDynamic*/ false); @@ -11355,7 +10568,7 @@ TEST_F(ExecutionTest, DynamicResourcesTest) { "}\n"; CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6)) return; // ResourceDescriptorHeap/SamplerDescriptorHeap requires Resource Binding Tier @@ -11398,7 +10611,7 @@ TEST_F(ExecutionTest, DynamicResourcesDynamicIndexingTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); std::shared_ptr ShaderOpSet = std::make_shared(); @@ -11436,7 +10649,7 @@ TEST_F(ExecutionTest, DynamicResourcesDynamicIndexingTest) { ((UINT)sm & 0x0f)); CComPtr pDevice; - if (!CreateDevice(&pDevice, sm, false /* skipUnsupported */)) { + if (!createDevice(&pDevice, sm, false /* skipUnsupported */)) { continue; } D3D12_FEATURE_DATA_D3D12_OPTIONS devOptions; @@ -11495,9 +10708,10 @@ TEST_F(ExecutionTest, DynamicResourcesDynamicIndexingTest) { // Test Compute shader { pShaderOp->CS = pShaderOp->GetString("CS66"); - std::shared_ptr test = RunShaderOpTestAfterParse( - pDevice, m_support, "DynamicResourcesDynamicIndexing", nullptr, - ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse(pDevice, m_support, + "DynamicResourcesDynamicIndexing", + nullptr, ShaderOpSet); MappedData resultData; test->Test->GetReadBackData("g_result", &resultData); @@ -11512,9 +10726,10 @@ TEST_F(ExecutionTest, DynamicResourcesDynamicIndexingTest) { pShaderOp->CS = nullptr; pShaderOp->VS = pShaderOp->GetString("VS66"); pShaderOp->PS = pShaderOp->GetString("PS66"); - std::shared_ptr test = RunShaderOpTestAfterParse( - pDevice, m_support, "DynamicResourcesDynamicIndexing", nullptr, - ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse(pDevice, m_support, + "DynamicResourcesDynamicIndexing", + nullptr, ShaderOpSet); MappedData resultVSData; MappedData resultPSData; @@ -11577,19 +10792,20 @@ void RunWaveSizeTest(UINT minWaveSize, UINT maxWaveSize, waveSize) != -1); // run the shader - std::shared_ptr test = RunShaderOpTestAfterParse( - pDevice, m_support, "WaveSizeTest", - [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { - VERIFY_IS_TRUE((0 == strncmp(Name, "UAVBuffer0", 10))); - pShaderOp->Shaders.at(0).Arguments = compilerOptions; - pShaderOp->Shaders.at(0).Text = waveSizeTestShader; - - VERIFY_IS_TRUE(sizeof(WaveSizeTestData) * MAX_WAVESIZE <= - Data.size()); - WaveSizeTestData *pInData = (WaveSizeTestData *)Data.data(); - memset(pInData, 0, sizeof(WaveSizeTestData) * MAX_WAVESIZE); - }, - ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse( + pDevice, m_support, "WaveSizeTest", + [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { + VERIFY_IS_TRUE((0 == strncmp(Name, "UAVBuffer0", 10))); + pShaderOp->Shaders.at(0).Arguments = compilerOptions; + pShaderOp->Shaders.at(0).Text = waveSizeTestShader; + + VERIFY_IS_TRUE(sizeof(WaveSizeTestData) * MAX_WAVESIZE <= + Data.size()); + WaveSizeTestData *pInData = (WaveSizeTestData *)Data.data(); + memset(pInData, 0, sizeof(WaveSizeTestData) * MAX_WAVESIZE); + }, + ShaderOpSet); // verify expected values MappedData dataUav; @@ -11665,7 +10881,7 @@ void ExecuteWaveSizeRangeInstance(UINT minWaveSize, UINT maxWaveSize, }; // run the shader - std::shared_ptr test = RunShaderOpTestAfterParse( + std::shared_ptr test = st::RunShaderOpTestAfterParse( pDevice, m_support, "WaveSizeTest", [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { VERIFY_IS_TRUE((0 == strncmp(Name, "UAVBuffer0", 10))); @@ -11737,7 +10953,7 @@ void ExecutionTest::WaveSizeTest() { WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6, + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6, /*skipUnsupported*/ false)) { return; } @@ -11765,7 +10981,7 @@ void ExecutionTest::WaveSizeTest() { CComPtr pStream; std::shared_ptr ShaderOpSet = std::make_shared(); - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); LogCommentFmt(L"Testing WaveSize attribute for shader model 6.6."); @@ -11777,7 +10993,7 @@ void ExecutionTest::WaveSizeRangeTest() { WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_8, + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_8, /*skipUnsupported*/ false)) { return; } @@ -11805,7 +11021,7 @@ void ExecutionTest::WaveSizeRangeTest() { CComPtr pStream; std::shared_ptr ShaderOpSet = std::make_shared(); - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); LogCommentFmt(L"Testing WaveSize Range attribute for shader model 6.8."); @@ -12034,7 +11250,7 @@ void VerifyAtomicResults(const BYTE *uResults, const BYTE *sResults, } } -void VerifyAtomicsRawTest(std::shared_ptr test, +void VerifyAtomicsRawTest(std::shared_ptr test, uint64_t maxIdx, size_t bitSize) { size_t stride = 8; @@ -12083,7 +11299,7 @@ void VerifyAtomicsRawTest(std::shared_ptr test, bitSize); } -void VerifyAtomicsTypedTest(std::shared_ptr test, +void VerifyAtomicsTypedTest(std::shared_ptr test, uint64_t maxIdx, size_t bitSize) { size_t stride = 8; @@ -12135,7 +11351,7 @@ void VerifyAtomicsTypedTest(std::shared_ptr test, VerifyAtomicResults(pUint, pSint + stride, pXchg, stride, maxIdx, bitSize); } -void VerifyAtomicsSharedTest(std::shared_ptr test, +void VerifyAtomicsSharedTest(std::shared_ptr test, uint64_t maxIdx, size_t bitSize) { size_t stride = 8; @@ -12156,7 +11372,7 @@ void VerifyAtomicsSharedTest(std::shared_ptr test, bitSize); } -void VerifyAtomicsTest(std::shared_ptr test, +void VerifyAtomicsTest(std::shared_ptr test, uint64_t maxIdx, size_t bitSize) { VerifyAtomicsRawTest(test, maxIdx, bitSize); VerifyAtomicsTypedTest(test, maxIdx, bitSize); @@ -12166,10 +11382,10 @@ TEST_F(ExecutionTest, AtomicsTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; std::shared_ptr ShaderOpSet = @@ -12181,7 +11397,7 @@ TEST_F(ExecutionTest, AtomicsTest) { // Test compute shader LogCommentFmt( L"Verifying 32-bit integer atomic operations in compute shader"); - std::shared_ptr test = RunShaderOpTestAfterParse( + std::shared_ptr test = st::RunShaderOpTestAfterParse( pDevice, m_support, "AtomicsHeap", nullptr, ShaderOpSet); VerifyAtomicsTest(test, 32 * 32, 32); @@ -12192,8 +11408,8 @@ TEST_F(ExecutionTest, AtomicsTest) { if (DoesDeviceSupportMeshShaders(pDevice)) { LogCommentFmt(L"Verifying 32-bit integer atomic operations in " L"amp/mesh/pixel shaders"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", + nullptr, ShaderOpSet); VerifyAtomicsTest(test, 8 * 8 * 2 + 8 * 8 * 2 + 64 * 64, 32); VerifyAtomicsSharedTest(test, 8 * 8 * 2 + 8 * 8 * 2, 32); } @@ -12202,8 +11418,8 @@ TEST_F(ExecutionTest, AtomicsTest) { pShaderOp->MS = nullptr; LogCommentFmt( L"Verifying 32-bit integer atomic operations in vert/pixel shaders"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", + nullptr, ShaderOpSet); VerifyAtomicsTest(test, 64 * 64 + 6, 32); } @@ -12211,10 +11427,10 @@ TEST_F(ExecutionTest, Atomics64Test) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6)) return; if (!DoesDeviceSupportInt64(pDevice)) { @@ -12240,7 +11456,7 @@ TEST_F(ExecutionTest, Atomics64Test) { // Test compute shader LogCommentFmt(L"Verifying 64-bit integer atomic operations on raw buffers in " L"compute shader"); - std::shared_ptr test = RunShaderOpTestAfterParse( + std::shared_ptr test = st::RunShaderOpTestAfterParse( pDevice, m_support, "AtomicsRoot", nullptr, ShaderOpSet); VerifyAtomicsRawTest(test, 32 * 32, 64); @@ -12249,8 +11465,8 @@ TEST_F(ExecutionTest, Atomics64Test) { if (DoesDeviceSupportMeshShaders(pDevice)) { LogCommentFmt(L"Verifying 64-bit integer atomic operations on raw buffers " L"in amp/mesh/pixel shader"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsRoot", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsRoot", + nullptr, ShaderOpSet); VerifyAtomicsRawTest(test, 8 * 8 * 2 + 8 * 8 * 2 + 64 * 64, 64); } @@ -12258,8 +11474,8 @@ TEST_F(ExecutionTest, Atomics64Test) { pShaderOp->MS = nullptr; LogCommentFmt(L"Verifying 64-bit integer atomic operations on raw buffers in " L"vert/pixel shader"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsRoot", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsRoot", + nullptr, ShaderOpSet); VerifyAtomicsRawTest(test, 64 * 64 + 6, 64); } @@ -12267,10 +11483,10 @@ TEST_F(ExecutionTest, AtomicsRawHeap64Test) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6)) return; if (!DoesDeviceSupportInt64(pDevice)) { @@ -12303,7 +11519,7 @@ TEST_F(ExecutionTest, AtomicsRawHeap64Test) { // Test compute shader LogCommentFmt(L"Verifying 64-bit integer atomic operations on heap raw " L"buffers in compute shader"); - std::shared_ptr test = RunShaderOpTestAfterParse( + std::shared_ptr test = st::RunShaderOpTestAfterParse( pDevice, m_support, "AtomicsHeap", nullptr, ShaderOpSet); VerifyAtomicsRawTest(test, 32 * 32, 64); @@ -12312,8 +11528,8 @@ TEST_F(ExecutionTest, AtomicsRawHeap64Test) { if (DoesDeviceSupportMeshShaders(pDevice)) { LogCommentFmt(L"Verifying 64-bit integer atomic operations on heap raw " L"buffers in amp/mesh/pixel shader"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", + nullptr, ShaderOpSet); VerifyAtomicsRawTest(test, 8 * 8 * 2 + 8 * 8 * 2 + 64 * 64, 64); } @@ -12321,8 +11537,8 @@ TEST_F(ExecutionTest, AtomicsRawHeap64Test) { pShaderOp->MS = nullptr; LogCommentFmt(L"Verifying 64-bit integer atomic operations on heap raw " L"buffers in vert/pixel shader"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", + nullptr, ShaderOpSet); VerifyAtomicsRawTest(test, 64 * 64 + 6, 64); } @@ -12330,10 +11546,10 @@ TEST_F(ExecutionTest, AtomicsTyped64Test) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6)) return; if (!DoesDeviceSupportInt64(pDevice)) { @@ -12366,7 +11582,7 @@ TEST_F(ExecutionTest, AtomicsTyped64Test) { // Test compute shader LogCommentFmt(L"Verifying 64-bit integer atomic operations on typed " L"resources in compute shader"); - std::shared_ptr test = RunShaderOpTestAfterParse( + std::shared_ptr test = st::RunShaderOpTestAfterParse( pDevice, m_support, "AtomicsHeap", nullptr, ShaderOpSet); VerifyAtomicsTypedTest(test, 32 * 32, 64); @@ -12375,8 +11591,8 @@ TEST_F(ExecutionTest, AtomicsTyped64Test) { if (DoesDeviceSupportMeshShaders(pDevice)) { LogCommentFmt(L"Verifying 64-bit integer atomic operations on typed " L"resources in amp/mesh/pixel shader"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", + nullptr, ShaderOpSet); VerifyAtomicsTypedTest(test, 8 * 8 * 2 + 8 * 8 * 2 + 64 * 64, 64); } @@ -12384,8 +11600,8 @@ TEST_F(ExecutionTest, AtomicsTyped64Test) { pShaderOp->MS = nullptr; LogCommentFmt(L"Verifying 64-bit integer atomic operations on typed " L"resources in vert/pixel shader"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsHeap", + nullptr, ShaderOpSet); VerifyAtomicsTypedTest(test, 64 * 64 + 6, 64); } @@ -12393,10 +11609,10 @@ TEST_F(ExecutionTest, AtomicsShared64Test) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice, D3D_SHADER_MODEL_6_6)) + if (!createDevice(&pDevice, D3D_SHADER_MODEL_6_6)) return; if (!DoesDeviceSupportInt64(pDevice)) { @@ -12426,7 +11642,7 @@ TEST_F(ExecutionTest, AtomicsShared64Test) { LogCommentFmt(L"Verifying 64-bit integer atomic operations on groupshared " L"variables in compute shader"); - std::shared_ptr test = RunShaderOpTestAfterParse( + std::shared_ptr test = st::RunShaderOpTestAfterParse( pDevice, m_support, "AtomicsRoot", nullptr, ShaderOpSet); VerifyAtomicsSharedTest(test, 32 * 32, 64); @@ -12435,8 +11651,8 @@ TEST_F(ExecutionTest, AtomicsShared64Test) { if (DoesDeviceSupportMeshShaders(pDevice)) { LogCommentFmt(L"Verifying 64-bit integer atomic operations on groupshared " L"variables in amp/mesh/pixel shader"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsRoot", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "AtomicsRoot", + nullptr, ShaderOpSet); VerifyAtomicsSharedTest(test, 8 * 8 * 2 + 8 * 8 * 2, 64); } } @@ -12464,7 +11680,8 @@ void VerifyAtomicFloatResults(const float *results) { } } -void VerifyAtomicsFloatSharedTest(std::shared_ptr test) { +void VerifyAtomicsFloatSharedTest( + std::shared_ptr test) { MappedData Data; const float *pData = nullptr; @@ -12476,7 +11693,7 @@ void VerifyAtomicsFloatSharedTest(std::shared_ptr test) { VerifyAtomicFloatResults(pData); } -void VerifyAtomicsFloatTest(std::shared_ptr test) { +void VerifyAtomicsFloatTest(std::shared_ptr test) { // struct mirroring that in the shader struct AtomicStuff { @@ -12524,10 +11741,10 @@ TEST_F(ExecutionTest, AtomicsFloatTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); CComPtr pDevice; - if (!CreateDevice(&pDevice)) + if (!createDevice(&pDevice)) return; std::shared_ptr ShaderOpSet = @@ -12539,7 +11756,7 @@ TEST_F(ExecutionTest, AtomicsFloatTest) { // Test compute shader LogCommentFmt( L"Verifying float cmp/xchg atomic operations in compute shader"); - std::shared_ptr test = RunShaderOpTestAfterParse( + std::shared_ptr test = st::RunShaderOpTestAfterParse( pDevice, m_support, "FloatAtomics", nullptr, ShaderOpSet); VerifyAtomicsFloatTest(test); VerifyAtomicsFloatSharedTest(test); @@ -12549,8 +11766,8 @@ TEST_F(ExecutionTest, AtomicsFloatTest) { if (DoesDeviceSupportMeshShaders(pDevice)) { LogCommentFmt(L"Verifying float cmp/xchg atomic operations in " L"amp/mesh/pixel shaders"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "FloatAtomics", - nullptr, ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "FloatAtomics", + nullptr, ShaderOpSet); VerifyAtomicsFloatTest(test); VerifyAtomicsFloatSharedTest(test); } @@ -12559,8 +11776,8 @@ TEST_F(ExecutionTest, AtomicsFloatTest) { pShaderOp->MS = nullptr; LogCommentFmt( L"Verifying float cmp/xchg atomic operations in vert/pixel shaders"); - test = RunShaderOpTestAfterParse(pDevice, m_support, "FloatAtomics", nullptr, - ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "FloatAtomics", + nullptr, ShaderOpSet); VerifyAtomicsFloatTest(test); } @@ -12589,7 +11806,7 @@ TEST_F(ExecutionTest, HelperLaneTest) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); std::shared_ptr ShaderOpSet = std::make_shared(); @@ -12604,19 +11821,20 @@ TEST_F(ExecutionTest, HelperLaneTest) { ((UINT)sm & 0x0f)); CComPtr pDevice; - if (!CreateDevice(&pDevice, sm, false /* skipUnsupported */)) + if (!createDevice(&pDevice, sm, false /* skipUnsupported */)) continue; - std::shared_ptr test = RunShaderOpTestAfterParse( - pDevice, m_support, "HelperLaneTestNoWave", - // this callback is called when the test is creating the resource to - // run the test - [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { - VERIFY_IS_TRUE(0 == _stricmp(Name, "UAVBuffer0")); - std::fill(Data.begin(), Data.end(), (BYTE)0xCC); - UNREFERENCED_PARAMETER(pShaderOp); - }, - ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse( + pDevice, m_support, "HelperLaneTestNoWave", + // this callback is called when the test is creating the resource to + // run the test + [&](LPCSTR Name, std::vector &Data, st::ShaderOp *pShaderOp) { + VERIFY_IS_TRUE(0 == _stricmp(Name, "UAVBuffer0")); + std::fill(Data.begin(), Data.end(), (BYTE)0xCC); + UNREFERENCED_PARAMETER(pShaderOp); + }, + ShaderOpSet); struct HelperLaneTestResult { int32_t is_helper_00; @@ -12989,7 +12207,7 @@ TEST_F(ExecutionTest, HelperLaneTestWave) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); std::shared_ptr ShaderOpSet = std::make_shared(); @@ -13010,7 +12228,7 @@ TEST_F(ExecutionTest, HelperLaneTestWave) { bool smPassed = true; CComPtr pDevice; - if (!CreateDevice(&pDevice, sm, false /* skipUnsupported */)) { + if (!createDevice(&pDevice, sm, false /* skipUnsupported */)) { continue; } @@ -13045,9 +12263,10 @@ TEST_F(ExecutionTest, HelperLaneTestWave) { // Test Compute shader { - std::shared_ptr test = - RunShaderOpTestAfterParse(pDevice, m_support, "HelperLaneTestWave", - CleanUAVBuffer0Buffer, ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse(pDevice, m_support, + "HelperLaneTestWave", + CleanUAVBuffer0Buffer, ShaderOpSet); MappedData uavData; test->Test->GetReadBackData("UAVBuffer0", &uavData); @@ -13069,9 +12288,10 @@ TEST_F(ExecutionTest, HelperLaneTestWave) { // Test Vertex + Pixel shader { pShaderOp->CS = nullptr; - std::shared_ptr test = - RunShaderOpTestAfterParse(pDevice, m_support, "HelperLaneTestWave", - CleanUAVBuffer0Buffer, ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse(pDevice, m_support, + "HelperLaneTestWave", + CleanUAVBuffer0Buffer, ShaderOpSet); MappedData uavData; test->Test->GetReadBackData("UAVBuffer0", &uavData); @@ -13130,7 +12350,7 @@ TEST_F(ExecutionTest, QuadAnyAll) { WEX::TestExecution::SetVerifyOutput verifySettings( WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); std::shared_ptr ShaderOpSet = std::make_shared(); @@ -13163,7 +12383,7 @@ TEST_F(ExecutionTest, QuadAnyAll) { } CComPtr pDevice; - if (!CreateDevice(&pDevice, sm, false /* skipUnsupported */)) { + if (!createDevice(&pDevice, sm, false /* skipUnsupported */)) { continue; } @@ -13176,8 +12396,9 @@ TEST_F(ExecutionTest, QuadAnyAll) { Skipped = false; // test compute - std::shared_ptr test = RunShaderOpTestAfterParse( - pDevice, m_support, "QuadAnyAll", CleanUAVBuffer0Buffer, ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse(pDevice, m_support, "QuadAnyAll", + CleanUAVBuffer0Buffer, ShaderOpSet); MappedData uavData; test->Test->GetReadBackData("UAVBuffer0", &uavData); @@ -13189,8 +12410,8 @@ TEST_F(ExecutionTest, QuadAnyAll) { pShaderOp->CS = nullptr; // test AS/MS - test = RunShaderOpTestAfterParse(pDevice, m_support, "QuadAnyAll", - CleanUAVBuffer0Buffer, ShaderOpSet); + test = st::RunShaderOpTestAfterParse(pDevice, m_support, "QuadAnyAll", + CleanUAVBuffer0Buffer, ShaderOpSet); test->Test->GetReadBackData("UAVBuffer0", &uavData); Result = VerifyQuadAnyAllResults((int2 *)uavData.data()); @@ -13337,7 +12558,7 @@ TEST_F(ExecutionTest, IsNormalTest) { WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); CComPtr pDevice; - VERIFY_IS_TRUE(CreateDevice(&pDevice, D3D_SHADER_MODEL_6_0, + VERIFY_IS_TRUE(createDevice(&pDevice, D3D_SHADER_MODEL_6_0, false /* skipUnsupported */)); // The input is -Zero, Zero, -Denormal, Denormal, -Infinity, Infinity, -NaN, @@ -13354,7 +12575,7 @@ TEST_F(ExecutionTest, IsNormalTest) { std::vector *Validation_Expected = &Validation_Expected_Vec; CComPtr pStream; - ReadHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream); + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &pStream, m_support); std::shared_ptr ShaderOpSet = std::make_shared(); @@ -13395,9 +12616,10 @@ TEST_F(ExecutionTest, IsNormalTest) { // Test Compute shader { pShaderOp->CS = pShaderOp->GetString("CS60"); - std::shared_ptr test = - RunShaderOpTestAfterParse(pDevice, m_support, "IsNormal", - ResourceInitFn, ShaderInitFn, ShaderOpSet); + std::shared_ptr test = + st::RunShaderOpTestAfterParse(pDevice, m_support, "IsNormal", + ResourceInitFn, ShaderInitFn, + ShaderOpSet); MappedData data; test->Test->GetReadBackData("g_TestData", &data); diff --git a/tools/clang/unittests/HLSLExec/HlslExecTestUtils.h b/tools/clang/unittests/HLSLExec/HlslExecTestUtils.h new file mode 100644 index 0000000000..3822ef02ad --- /dev/null +++ b/tools/clang/unittests/HLSLExec/HlslExecTestUtils.h @@ -0,0 +1,405 @@ +#ifndef HLSLEXECTESTUTILS_H +#define HLSLEXECTESTUTILS_H + +#include "dxc/Support/dxcapi.use.h" +#include "dxc/Test/HlslTestUtils.h" +#include +#include +#include + +namespace ExecTestUtils { +// This is defined in d3d.h for Windows 10 Anniversary Edition SDK, but we +// only require the Windows 10 SDK. +typedef enum D3D_SHADER_MODEL { + D3D_SHADER_MODEL_5_1 = 0x51, + D3D_SHADER_MODEL_6_0 = 0x60, + D3D_SHADER_MODEL_6_1 = 0x61, + D3D_SHADER_MODEL_6_2 = 0x62, + D3D_SHADER_MODEL_6_3 = 0x63, + D3D_SHADER_MODEL_6_4 = 0x64, + D3D_SHADER_MODEL_6_5 = 0x65, + D3D_SHADER_MODEL_6_6 = 0x66, + D3D_SHADER_MODEL_6_7 = 0x67, + D3D_SHADER_MODEL_6_8 = 0x68, + D3D_SHADER_MODEL_6_9 = 0x69, + D3D_HIGHEST_SHADER_MODEL = D3D_SHADER_MODEL_6_9 +} D3D_SHADER_MODEL; +} // namespace ExecTestUtils + +static bool useDebugIfaces() { return true; } + +static bool useDxbc() { +#ifdef _HLK_CONF + return false; +#else + return hlsl_test::GetTestParamBool(L"DXBC"); +#endif +} + +static bool useWarpByDefualt() { +#ifdef _HLK_CONF + return false; +#else + return true; +#endif +} + +// A more recent Windows SDK than currently required is needed for these. +typedef HRESULT(WINAPI *D3D12EnableExperimentalFeaturesFn)( + UINT NumFeatures, __in_ecount(NumFeatures) const IID *IIDs, + __in_ecount_opt(NumFeatures) void *ConfigurationStructs, + __in_ecount_opt(NumFeatures) UINT *ConfigurationStructSizes); + +static const GUID D3D12ExperimentalShaderModelsID = + {/* 76f5573e-f13a-40f5-b297-81ce9e18933f */ + 0x76f5573e, + 0xf13a, + 0x40f5, + {0xb2, 0x97, 0x81, 0xce, 0x9e, 0x18, 0x93, 0x3f}}; + +// Used to create D3D12SDKConfiguration to enable AgilitySDK programmatically. +typedef HRESULT(WINAPI *D3D12GetInterfaceFn)(REFCLSID Rclsid, REFIID Riid, + void **Debug); + +#ifndef __ID3D12SDKConfiguration_INTERFACE_DEFINED__ + +// Copied from AgilitySDK D3D12.h to programmatically enable when in developer +// mode. +#define __ID3D12SDKConfiguration_INTERFACE_DEFINED__ + +EXTERN_C const GUID DECLSPEC_SELECTANY IID_ID3D12SDKConfiguration = { + 0xe9eb5314, + 0x33aa, + 0x42b2, + {0xa7, 0x18, 0xd7, 0x7f, 0x58, 0xb1, 0xf1, 0xc7}}; +EXTERN_C const GUID DECLSPEC_SELECTANY CLSID_D3D12SDKConfiguration = { + 0x7cda6aca, + 0xa03e, + 0x49c8, + {0x94, 0x58, 0x03, 0x34, 0xd2, 0x0e, 0x07, 0xce}}; + +MIDL_INTERFACE("e9eb5314-33aa-42b2-a718-d77f58b1f1c7") +ID3D12SDKConfiguration : public IUnknown { +public: + virtual HRESULT STDMETHODCALLTYPE SetSDKVersion(UINT SDKVersion, + LPCSTR SDKPath) = 0; +}; +#endif /* __ID3D12SDKConfiguration_INTERFACE_DEFINED__ */ + +static std::wstring getModuleName() { + wchar_t ModuleName[MAX_PATH + 1] = {0}; + const DWORD Length = GetModuleFileNameW(NULL, ModuleName, MAX_PATH); + + if (Length == 0 || Length == MAX_PATH) + return std::wstring(); // Error condition + + return std::wstring(ModuleName, Length); +} + +static std::wstring computeSDKFullPath(std::wstring SDKPath) { + std::wstring ModulePath = getModuleName(); + const size_t Pos = ModulePath.rfind('\\'); + + if (Pos == std::wstring::npos) + return SDKPath; + + if (SDKPath.substr(0, 2) != L".\\") + return SDKPath; + + return ModulePath.substr(0, Pos) + SDKPath.substr(1); +} + +static UINT getD3D12SDKVersion(std::wstring SDKPath) { + // Try to automatically get the D3D12SDKVersion from the DLL + UINT SDKVersion = 0; + std::wstring D3DCorePath = computeSDKFullPath(SDKPath); + D3DCorePath.append(L"D3D12Core.dll"); + HMODULE D3DCore = LoadLibraryW(D3DCorePath.c_str()); + if (D3DCore) { + if (UINT *SDKVersionOut = + (UINT *)GetProcAddress(D3DCore, "D3D12SDKVersion")) + SDKVersion = *SDKVersionOut; + FreeModule(D3DCore); + } + return SDKVersion; +} + +static bool createDevice(ID3D12Device **D3DDevice, + ExecTestUtils::D3D_SHADER_MODEL TestModel = + ExecTestUtils::D3D_SHADER_MODEL_6_0, + bool SkipUnsupported = true) { + if (TestModel > ExecTestUtils::D3D_HIGHEST_SHADER_MODEL) { + const UINT Minor = (UINT)TestModel & 0x0f; + hlsl_test::LogCommentFmt(L"Installed SDK does not support " + L"shader model 6.%1u", + Minor); + + if (SkipUnsupported) + WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); + + return false; + } + CComPtr DXGIFactory; + CComPtr D3DDeviceCom; + + *D3DDevice = nullptr; + + VERIFY_SUCCEEDED(CreateDXGIFactory1(IID_PPV_ARGS(&DXGIFactory))); + if (hlsl_test::GetTestParamUseWARP(useWarpByDefualt())) { + CComPtr WarpAdapter; + VERIFY_SUCCEEDED(DXGIFactory->EnumWarpAdapter(IID_PPV_ARGS(&WarpAdapter))); + HRESULT CreateHR = D3D12CreateDevice(WarpAdapter, D3D_FEATURE_LEVEL_11_0, + IID_PPV_ARGS(&D3DDeviceCom)); + if (FAILED(CreateHR)) { + hlsl_test::LogCommentFmt( + L"The available version of WARP does not support d3d12."); + + if (SkipUnsupported) + WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); + + return false; + } + + if (GetModuleHandleW(L"d3d10warp.dll") != NULL) { + WCHAR FullModuleFilePath[MAX_PATH] = L""; + GetModuleFileNameW(GetModuleHandleW(L"d3d10warp.dll"), FullModuleFilePath, + sizeof(FullModuleFilePath)); + WEX::Logging::Log::Comment(WEX::Common::String().Format( + L"WARP driver loaded from: %ls", FullModuleFilePath)); + } + + } else { + CComPtr HardwareAdapter; + WEX::Common::String AdapterValue; + HRESULT HR = WEX::TestExecution::RuntimeParameters::TryGetValue( + L"Adapter", AdapterValue); + if (SUCCEEDED(HR)) + st::GetHardwareAdapter(DXGIFactory, AdapterValue, &HardwareAdapter); + else + WEX::Logging::Log::Comment( + L"Using default hardware adapter with D3D12 support."); + + VERIFY_SUCCEEDED(D3D12CreateDevice(HardwareAdapter, D3D_FEATURE_LEVEL_11_0, + IID_PPV_ARGS(&D3DDeviceCom))); + } + // retrieve adapter information + const LUID AdapterID = D3DDeviceCom->GetAdapterLuid(); + CComPtr DXGIAdapter; + DXGIFactory->EnumAdapterByLuid(AdapterID, IID_PPV_ARGS(&DXGIAdapter)); + DXGI_ADAPTER_DESC AdapterDesc; + VERIFY_SUCCEEDED(DXGIAdapter->GetDesc(&AdapterDesc)); + hlsl_test::LogCommentFmt(L"Using Adapter:%s", AdapterDesc.Description); + + if (D3DDeviceCom == nullptr) + return false; + + if (!useDxbc()) { + // Check for DXIL support. + typedef struct D3D12_FEATURE_DATA_SHADER_MODEL { + ExecTestUtils::D3D_SHADER_MODEL HighestShaderModel; + } D3D12_FEATURE_DATA_SHADER_MODEL; + const UINT D3D12_FEATURE_SHADER_MODEL = 7; + D3D12_FEATURE_DATA_SHADER_MODEL SMData; + SMData.HighestShaderModel = TestModel; + if (FAILED(D3DDeviceCom->CheckFeatureSupport( + (D3D12_FEATURE)D3D12_FEATURE_SHADER_MODEL, &SMData, + sizeof(SMData))) || + SMData.HighestShaderModel < TestModel) { + const UINT Minor = (UINT)TestModel & 0x0f; + hlsl_test::LogCommentFmt(L"The selected device does not support " + L"shader model 6.%1u", + Minor); + + if (SkipUnsupported) + WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); + + return false; + } + } + + if (useDebugIfaces()) { + CComPtr InfoQueue; + if (SUCCEEDED(D3DDeviceCom->QueryInterface(&InfoQueue))) + InfoQueue->SetMuteDebugOutput(FALSE); + } + + *D3DDevice = D3DDeviceCom.Detach(); + return true; +} + +inline void readHlslDataIntoNewStream(LPCWSTR RelativePath, IStream **Stream, + dxc::DxcDllSupport &Support) { + VERIFY_SUCCEEDED(Support.Initialize()); + CComPtr Library; + CComPtr Blob; + CComPtr StreamCom; + std::wstring Path = hlsl_test::GetPathToHlslDataFile( + RelativePath, HLSLDATAFILEPARAM, DEFAULT_EXEC_TEST_DIR); + VERIFY_SUCCEEDED(Support.CreateInstance(CLSID_DxcLibrary, &Library)); + VERIFY_SUCCEEDED(Library->CreateBlobFromFile(Path.c_str(), nullptr, &Blob)); + VERIFY_SUCCEEDED(Library->CreateStreamFromBlobReadOnly(Blob, &StreamCom)); + *Stream = StreamCom.Detach(); +} + +static HRESULT enableAgilitySDK(HMODULE Runtime, UINT SDKVersion, + LPCWSTR SDKPath) { + D3D12GetInterfaceFn GetInterfaceFunc = + (D3D12GetInterfaceFn)GetProcAddress(Runtime, "D3D12GetInterface"); + CComPtr D3D12SDKConfiguration; + IFR(GetInterfaceFunc(CLSID_D3D12SDKConfiguration, + IID_PPV_ARGS(&D3D12SDKConfiguration))); + IFR(D3D12SDKConfiguration->SetSDKVersion(SDKVersion, CW2A(SDKPath))); + + // Currently, it appears that the SetSDKVersion will succeed even when + // D3D12Core is not found, or its version doesn't match. When that's the + // case, will cause a failure in the very next thing that actually requires + // D3D12Core.dll to be loaded instead. So, we attempt to clear experimental + // features next, which is a valid use case and a no-op at this point. This + // requires D3D12Core to be loaded. If this fails, we know the AgilitySDK + // setting actually failed. + D3D12EnableExperimentalFeaturesFn ExperimentalFeaturesFunc = + (D3D12EnableExperimentalFeaturesFn)GetProcAddress( + Runtime, "D3D12EnableExperimentalFeatures"); + if (ExperimentalFeaturesFunc == nullptr) + // If this failed, D3D12 must be too old for AgilitySDK. But if that's + // the case, creating D3D12SDKConfiguration should have failed. So while + // this case shouldn't be hit, fail if it is. + return HRESULT_FROM_WIN32(GetLastError()); + + return ExperimentalFeaturesFunc(0, nullptr, nullptr, nullptr); +} + +static HRESULT +enableExperimentalShaderModels(HMODULE hRuntime, + UUID AdditionalFeatures[] = nullptr, + size_t NumAdditionalFeatures = 0) { + D3D12EnableExperimentalFeaturesFn ExperimentalFeaturesFunc = + (D3D12EnableExperimentalFeaturesFn)GetProcAddress( + hRuntime, "D3D12EnableExperimentalFeatures"); + if (ExperimentalFeaturesFunc == nullptr) + return HRESULT_FROM_WIN32(GetLastError()); + + std::vector Features; + + Features.push_back(D3D12ExperimentalShaderModels); + + if (AdditionalFeatures != nullptr && NumAdditionalFeatures > 0) + Features.insert(Features.end(), AdditionalFeatures, + AdditionalFeatures + NumAdditionalFeatures); + + return ExperimentalFeaturesFunc((UINT)Features.size(), Features.data(), + nullptr, nullptr); +} + +static HRESULT +enableExperimentalShaderModels(UUID AdditionalFeatures[] = nullptr, + size_t NumAdditionalFeatures = 0) { + HMODULE Runtime = LoadLibraryW(L"d3d12.dll"); + if (Runtime == NULL) + return E_FAIL; + return enableExperimentalShaderModels(Runtime, AdditionalFeatures, + NumAdditionalFeatures); +} + +static HRESULT disableExperimentalShaderModels() { + HMODULE Runtime = LoadLibraryW(L"d3d12.dll"); + if (Runtime == NULL) + return E_FAIL; + + D3D12EnableExperimentalFeaturesFn ExperimentalFeaturesFunc = + (D3D12EnableExperimentalFeaturesFn)GetProcAddress( + Runtime, "D3D12EnableExperimentalFeatures"); + if (ExperimentalFeaturesFunc == nullptr) + return HRESULT_FROM_WIN32(GetLastError()); + + return ExperimentalFeaturesFunc(0, nullptr, nullptr, nullptr); +} + +static HRESULT enableAgilitySDK(HMODULE Runtime) { + // D3D12SDKVersion > 1 will use provided version, otherwise, auto-detect. + // D3D12SDKVersion == 1 means fail if we can't auto-detect. + UINT SDKVersion = 0; + WEX::TestExecution::RuntimeParameters::TryGetValue(L"D3D12SDKVersion", + SDKVersion); + + // SDKPath must be relative path from .exe, which means relative to + // TE.exe location, and must start with ".\\", such as with the + // default: ".\\D3D12\\" + WEX::Common::String SDKPath; + if (SUCCEEDED(WEX::TestExecution::RuntimeParameters::TryGetValue( + L"D3D12SDKPath", SDKPath))) { + // Make sure path ends in backslash + if (!SDKPath.IsEmpty() && SDKPath.Right(1) != "\\") + SDKPath.Append("\\"); + } + + if (SDKPath.IsEmpty()) + SDKPath = L".\\D3D12\\"; + + const bool MustFind = SDKVersion > 0; + if (SDKVersion <= 1) { + // lookup version from D3D12Core.dll + SDKVersion = getD3D12SDKVersion((LPCWSTR)SDKPath); + if (MustFind && SDKVersion == 0) { + hlsl_test::LogErrorFmt(L"Agility SDK not found in relative path: %s", + (LPCWSTR)SDKPath); + return E_FAIL; + } + } + + // Not found, not asked for. + if (SDKVersion == 0) + return S_FALSE; + + HRESULT HR = enableAgilitySDK(Runtime, SDKVersion, (LPCWSTR)SDKPath); + if (FAILED(HR)) { + // If SDKVersion provided, fail if not successful. + // 1 means we should find it, and fill in the version automatically. + if (MustFind) { + hlsl_test::LogErrorFmt( + L"Failed to set Agility SDK version %d at path: %s", SDKVersion, + (LPCWSTR)SDKPath); + return HR; + } + return S_FALSE; + } + if (HR == S_OK) + hlsl_test::LogCommentFmt(L"Agility SDK version set to: %d", SDKVersion); + + return HR; +} + +static HRESULT enableExperimentalMode(HMODULE Runtime) { +#ifdef _FORCE_EXPERIMENTAL_SHADERS + bool ExperimentalShaderModels = true; +#else + bool ExperimentalShaderModels = + hlsl_test::GetTestParamBool(L"ExperimentalShaders"); +#endif // _FORCE_EXPERIMENTAL_SHADERS + + HRESULT HR = S_FALSE; + if (ExperimentalShaderModels) { + HR = enableExperimentalShaderModels(Runtime); + if (SUCCEEDED(HR)) + WEX::Logging::Log::Comment(L"Experimental shader models enabled."); + } + + return HR; +} + +static HRESULT enableDebugLayer() { + // The debug layer does net yet validate DXIL programs that require + // rewriting, but basic logging should work properly. + HRESULT HR = S_FALSE; + if (useDebugIfaces()) { + CComPtr DebugController; + HR = D3D12GetDebugInterface(IID_PPV_ARGS(&DebugController)); + if (SUCCEEDED(HR)) { + DebugController->EnableDebugLayer(); + HR = S_OK; + } + } + return HR; +} + +#endif // HLSLEXECTESTUTILS_H diff --git a/tools/clang/unittests/HLSLExec/LongVectorOpTable.xml b/tools/clang/unittests/HLSLExec/LongVectorOpTable.xml new file mode 100644 index 0000000000..f3b2e62dbc --- /dev/null +++ b/tools/clang/unittests/HLSLExec/LongVectorOpTable.xml @@ -0,0 +1,693 @@ + + + + + + String + + String + String + String + + + + BinaryOpType_ScalarAdd + bool + + + BinaryOpType_Add + bool + + + BinaryOpType_ScalarSubtract + bool + + + BinaryOpType_Subtract + bool + + + + BinaryOpType_ScalarAdd + int16 + + + BinaryOpType_Add + int16 + + + BinaryOpType_ScalarSubtract + int16 + + + BinaryOpType_Subtract + int16 + + + BinaryOpType_ScalarMultiply + int16 + + + BinaryOpType_Multiply + int16 + + + BinaryOpType_ScalarDivide + int16 + + + BinaryOpType_Divide + int16 + + + BinaryOpType_ScalarModulus + int16 + + + BinaryOpType_Modulus + int16 + + + BinaryOpType_ScalarMin + int16 + + + BinaryOpType_Min + int16 + + + BinaryOpType_ScalarMax + int16 + + + BinaryOpType_Max + int16 + + + + BinaryOpType_ScalarAdd + int32 + + + BinaryOpType_Add + int32 + + + BinaryOpType_ScalarSubtract + int32 + + + BinaryOpType_Subtract + int32 + + + BinaryOpType_ScalarMultiply + int32 + + + BinaryOpType_Multiply + int32 + + + BinaryOpType_ScalarDivide + int32 + + + BinaryOpType_Divide + int32 + + + BinaryOpType_ScalarModulus + int32 + + + BinaryOpType_Modulus + int32 + + + BinaryOpType_ScalarMin + int32 + + + BinaryOpType_Min + int32 + + + BinaryOpType_ScalarMax + int32 + + + BinaryOpType_Max + int32 + + + + BinaryOpType_ScalarAdd + int64 + + + BinaryOpType_Add + int64 + + + BinaryOpType_ScalarSubtract + int64 + + + BinaryOpType_Subtract + int64 + + + BinaryOpType_ScalarMultiply + int64 + + + BinaryOpType_Multiply + int64 + + + BinaryOpType_ScalarDivide + int64 + + + BinaryOpType_Divide + int64 + + + BinaryOpType_ScalarModulus + int64 + + + BinaryOpType_Modulus + int64 + + + BinaryOpType_ScalarMin + int64 + + + BinaryOpType_Min + int64 + + + BinaryOpType_ScalarMax + int64 + + + BinaryOpType_Max + int64 + + + + BinaryOpType_ScalarAdd + uint16 + + + BinaryOpType_Add + uint16 + + + BinaryOpType_ScalarSubtract + uint16 + + + BinaryOpType_Subtract + uint16 + + + BinaryOpType_ScalarMultiply + uint16 + + + BinaryOpType_Multiply + uint16 + + + BinaryOpType_ScalarDivide + uint16 + + + BinaryOpType_Divide + uint16 + + + BinaryOpType_ScalarModulus + uint16 + + + BinaryOpType_Modulus + uint16 + + + BinaryOpType_ScalarMin + uint16 + + + BinaryOpType_Min + uint16 + + + BinaryOpType_ScalarMax + uint16 + + + BinaryOpType_Max + uint16 + + + + BinaryOpType_ScalarAdd + uint32 + + + BinaryOpType_Add + uint32 + + + BinaryOpType_ScalarSubtract + uint32 + + + BinaryOpType_Subtract + uint32 + + + BinaryOpType_ScalarMultiply + uint32 + + + BinaryOpType_Multiply + uint32 + + + BinaryOpType_ScalarDivide + uint32 + + + BinaryOpType_Divide + uint32 + + + BinaryOpType_ScalarModulus + uint32 + + + BinaryOpType_Modulus + uint32 + + + BinaryOpType_ScalarMin + uint32 + + + BinaryOpType_Min + uint32 + + + BinaryOpType_ScalarMax + uint32 + + + BinaryOpType_Max + uint32 + + + + BinaryOpType_ScalarAdd + uint64 + + + BinaryOpType_Add + uint64 + + + BinaryOpType_ScalarSubtract + uint64 + + + BinaryOpType_Subtract + uint64 + + + BinaryOpType_ScalarMultiply + uint64 + + + BinaryOpType_Multiply + uint64 + + + BinaryOpType_ScalarDivide + uint64 + + + BinaryOpType_Divide + uint64 + + + BinaryOpType_ScalarModulus + uint64 + + + BinaryOpType_Modulus + uint64 + + + BinaryOpType_ScalarMin + uint64 + + + BinaryOpType_Min + uint64 + + + BinaryOpType_ScalarMax + uint64 + + + BinaryOpType_Max + uint64 + + + + BinaryOpType_ScalarAdd + float16 + + + BinaryOpType_Add + float16 + + + BinaryOpType_ScalarSubtract + float16 + + + BinaryOpType_Subtract + float16 + + + BinaryOpType_ScalarMultiply + float16 + + + BinaryOpType_Multiply + float16 + + + BinaryOpType_ScalarDivide + float16 + + + BinaryOpType_Divide + float16 + + + BinaryOpType_ScalarModulus + float16 + + + BinaryOpType_Modulus + float16 + + + BinaryOpType_ScalarMin + float16 + + + BinaryOpType_Min + float16 + + + BinaryOpType_ScalarMax + float16 + + + BinaryOpType_Max + float16 + + + + BinaryOpType_ScalarAdd + float32 + + + BinaryOpType_Add + float32 + + + BinaryOpType_ScalarSubtract + float32 + + + BinaryOpType_Subtract + float32 + + + BinaryOpType_ScalarMultiply + float32 + + + BinaryOpType_Multiply + float32 + + + BinaryOpType_ScalarDivide + float32 + + + BinaryOpType_Divide + float32 + + + BinaryOpType_ScalarModulus + float32 + + + BinaryOpType_Modulus + float32 + + + BinaryOpType_ScalarMin + float32 + + + BinaryOpType_Min + float32 + + + BinaryOpType_ScalarMax + float32 + + + BinaryOpType_Max + float32 + + + + BinaryOpType_ScalarAdd + float64 + + + BinaryOpType_Add + float64 + + + BinaryOpType_ScalarSubtract + float64 + + + BinaryOpType_Subtract + float64 + + + BinaryOpType_ScalarMultiply + float64 + + + BinaryOpType_Multiply + float64 + + + BinaryOpType_ScalarDivide + float64 + + + BinaryOpType_Divide + float64 + + + BinaryOpType_ScalarMin + float64 + + + BinaryOpType_Min + float64 + + + BinaryOpType_ScalarMax + float64 + + + BinaryOpType_Max + float64 + +
+ + + + String + String + String + + + + UnaryOpType_Initialize + bool + + + + UnaryOpType_Initialize + int16 + + + + UnaryOpType_Initialize + int32 + + + + UnaryOpType_Initialize + int64 + + + + UnaryOpType_Initialize + uint16 + + + + UnaryOpType_Initialize + uint32 + + + + UnaryOpType_Initialize + uint64 + + + + UnaryOpType_Initialize + float16 + + + + UnaryOpType_Initialize + float32 + + + + UnaryOpType_Initialize + float64 + +
+ + + + String + + String + String + + + + TrigonometricOpType_Acos + float16 + TrigonometricInputValueSet_RangeOne + + + TrigonometricOpType_Asin + float16 + TrigonometricInputValueSet_RangeHalfPi + + + TrigonometricOpType_Atan + float16 + TrigonometricInputValueSet_RangeHalfPi + + + TrigonometricOpType_Cos + float16 + + + TrigonometricOpType_Cosh + float16 + + + TrigonometricOpType_Sin + float16 + + + TrigonometricOpType_Sinh + float16 + + + TrigonometricOpType_Tan + float16 + + + TrigonometricOpType_Tanh + float16 + + + + TrigonometricOpType_Acos + float32 + TrigonometricInputValueSet_RangeOne + + + TrigonometricOpType_Asin + float32 + TrigonometricInputValueSet_RangeHalfPi + + + TrigonometricOpType_Atan + float32 + TrigonometricInputValueSet_RangeHalfPi + + + TrigonometricOpType_Cos + float32 + + + TrigonometricOpType_Cosh + float32 + + + TrigonometricOpType_Sin + float32 + + + TrigonometricOpType_Sinh + float32 + + + TrigonometricOpType_Tan + float32 + + + TrigonometricOpType_Tanh + float32 + +
+
diff --git a/tools/clang/unittests/HLSLExec/LongVectorTestData.h b/tools/clang/unittests/HLSLExec/LongVectorTestData.h new file mode 100644 index 0000000000..bc6ea8c7c2 --- /dev/null +++ b/tools/clang/unittests/HLSLExec/LongVectorTestData.h @@ -0,0 +1,298 @@ +#ifndef LONGVECTORTESTDATA_H +#define LONGVECTORTESTDATA_H + +#include +#include +#include +#include +#include + +// A helper struct because C++ bools are 1 byte and HLSL bools are 4 bytes. +// Take int32_t as a constuctor argument and convert it to bool when needed. +// Comparisons cast to a bool because we only care if the bool representation is +// true or false. +struct HLSLBool_t { + HLSLBool_t() : Val(0) {} + HLSLBool_t(int32_t Val) : Val(Val) {} + HLSLBool_t(bool Val) : Val(Val) {} + HLSLBool_t(const HLSLBool_t &Other) : Val(Other.Val) {} + + bool operator==(const HLSLBool_t &Other) const { + return static_cast(Val) == static_cast(Other.Val); + } + + bool operator!=(const HLSLBool_t &Other) const { + return static_cast(Val) != static_cast(Other.Val); + } + + bool operator<(const HLSLBool_t &Other) const { return Val < Other.Val; } + + bool operator>(const HLSLBool_t &Other) const { return Val > Other.Val; } + + bool operator<=(const HLSLBool_t &Other) const { return Val <= Other.Val; } + + bool operator>=(const HLSLBool_t &Other) const { return Val >= Other.Val; } + + HLSLBool_t operator*(const HLSLBool_t &Other) const { + return HLSLBool_t(Val * Other.Val); + } + + HLSLBool_t operator+(const HLSLBool_t &Other) const { + return HLSLBool_t(Val + Other.Val); + } + + HLSLBool_t operator-(const HLSLBool_t &Other) const { + return HLSLBool_t(Val - Other.Val); + } + + HLSLBool_t operator/(const HLSLBool_t &Other) const { + return HLSLBool_t(Val / Other.Val); + } + + HLSLBool_t operator%(const HLSLBool_t &Other) const { + return HLSLBool_t(Val % Other.Val); + } + + // So we can construct std::wstrings using std::wostream + friend std::wostream &operator<<(std::wostream &Os, const HLSLBool_t &Obj) { + Os << static_cast(Obj.Val); + return Os; + } + + // So we can construct std::strings using std::ostream + friend std::ostream &operator<<(std::ostream &Os, const HLSLBool_t &Obj) { + Os << static_cast(Obj.Val); + return Os; + } + + int32_t Val = 0; +}; + +// No native float16 type in C++ until C++23 . So we use uint16_t to represent +// it. Simple little wrapping struct to help handle the right behavior. +struct HLSLHalf_t { + HLSLHalf_t() : Val(0) {} + HLSLHalf_t(DirectX::PackedVector::HALF Val) : Val(Val) {} + HLSLHalf_t(const HLSLHalf_t &Other) : Val(Other.Val) {} + HLSLHalf_t(const float F) { + Val = DirectX::PackedVector::XMConvertFloatToHalf(F); + } + HLSLHalf_t(const double D) { + float F = 0.0f; + // We wrap '::max' in () to prevent it from being expanded as a + // macro by the Windows SDK. + if (D >= (std::numeric_limits::max)()) + F = (std::numeric_limits::max)(); + else if (D <= std::numeric_limits::lowest()) + F = std::numeric_limits::lowest(); + else + F = static_cast(D); + + Val = DirectX::PackedVector::XMConvertFloatToHalf(F); + } + HLSLHalf_t(const int I) { + VERIFY_IS_TRUE(I == 0, L"HLSLHalf_t constructor with int override only " + L"meant for cases when initializing to 0."); + const float F = static_cast(I); + Val = DirectX::PackedVector::XMConvertFloatToHalf(F); + } + + // Implicit conversion to float for use with things like std::acos, std::tan, + // etc + operator float() const { + return DirectX::PackedVector::XMConvertHalfToFloat(Val); + } + + bool operator==(const HLSLHalf_t &Other) const { + // Convert to floats to properly handle the '0 == -0' case which must + // compare to true but have different uint16_t values. + // That is, 0 == -0 is true. We store Val as a uint16_t. + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + return A == B; + } + + bool operator<(const HLSLHalf_t &Other) const { + return DirectX::PackedVector::XMConvertHalfToFloat(Val) < + DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + } + + bool operator>(const HLSLHalf_t &Other) const { + return DirectX::PackedVector::XMConvertHalfToFloat(Val) > + DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + } + + // Used by tolerance checks in the tests. + bool operator>(float F) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + return A > F; + } + + bool operator<(float F) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + return A < F; + } + + bool operator<=(const HLSLHalf_t &Other) const { + return DirectX::PackedVector::XMConvertHalfToFloat(Val) <= + DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + } + + bool operator>=(const HLSLHalf_t &Other) const { + return DirectX::PackedVector::XMConvertHalfToFloat(Val) >= + DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + } + + bool operator!=(const HLSLHalf_t &Other) const { return Val != Other.Val; } + + HLSLHalf_t operator*(const HLSLHalf_t &Other) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + return HLSLHalf_t(DirectX::PackedVector::XMConvertFloatToHalf(A * B)); + } + + HLSLHalf_t operator+(const HLSLHalf_t &Other) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + return HLSLHalf_t(DirectX::PackedVector::XMConvertFloatToHalf(A + B)); + } + + HLSLHalf_t operator-(const HLSLHalf_t &Other) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + return HLSLHalf_t(DirectX::PackedVector::XMConvertFloatToHalf(A - B)); + } + + HLSLHalf_t operator/(const HLSLHalf_t &Other) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + return HLSLHalf_t(DirectX::PackedVector::XMConvertFloatToHalf(A / B)); + } + + HLSLHalf_t operator%(const HLSLHalf_t &Other) const { + const float A = DirectX::PackedVector::XMConvertHalfToFloat(Val); + const float B = DirectX::PackedVector::XMConvertHalfToFloat(Other.Val); + const float C = std::fmod(A, B); + return HLSLHalf_t(DirectX::PackedVector::XMConvertFloatToHalf(C)); + } + + // So we can construct std::wstrings using std::wostream + friend std::wostream &operator<<(std::wostream &Os, const HLSLHalf_t &Obj) { + Os << DirectX::PackedVector::XMConvertHalfToFloat(Obj.Val); + return Os; + } + + // So we can construct std::wstrings using std::wostream + friend std::ostream &operator<<(std::ostream &Os, const HLSLHalf_t &Obj) { + Os << DirectX::PackedVector::XMConvertHalfToFloat(Obj.Val); + return Os; + } + + // HALF is an alias to uint16_t + DirectX::PackedVector::HALF Val = 0; +}; + +template struct LongVectorTestData { + static const std::map> Data; +}; + +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", + {false, true, false, false, false, false, true, true, true, true}}, + {L"DefaultInputValueSet2", + {true, false, false, false, false, true, true, true, false, false}}, + }; +}; + +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", {-6, 1, 7, 3, 8, 4, -3, 8, 8, -2}}, + {L"DefaultInputValueSet2", {5, -6, -3, -2, 9, 3, 1, -3, -7, 2}}, + }; +}; + +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", {-6, 1, 7, 3, 8, 4, -3, 8, 8, -2}}, + {L"DefaultInputValueSet2", {5, -6, -3, -2, 9, 3, 1, -3, -7, 2}}, + }; +}; + +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", {-6, 11, 7, 3, 8, 4, -3, 8, 8, -2}}, + {L"DefaultInputValueSet2", {5, -1337, -3, -2, 9, 3, 1, -3, 501, 2}}, + }; +}; + +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", {1, 699, 3, 1023, 5, 6, 0, 8, 9, 10}}, + {L"DefaultInputValueSet2", {2, 111, 3, 4, 5, 9, 21, 8, 9, 10}}, + }; +}; + +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", {1, 2, 3, 4, 5, 0, 7, 8, 9, 10}}, + {L"DefaultInputValueSet2", {1, 2, 3, 4, 5, 6, 7, 8, 9, 10}}, + }; +}; + +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", {1, 2, 3, 4, 5, 0, 7, 1000, 9, 10}}, + {L"DefaultInputValueSet2", {1, 2, 1337, 4, 5, 6, 7, 8, 9, 10}}, + }; +}; + +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", + {-1.0, -1.0, 1.0, -0.01, 1.0, -0.01, 1.0, -0.01, 1.0, -0.01}}, + {L"DefaultInputValueSet2", + {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0}}, + {L"DefaultClampArgs", {-1.0, 1.0}}, // Min, Max values for clamp + // Range [ -pi/2, pi/2] + {L"TrigonometricInputValueSet_RangeHalfPi", + {-1.073, 0.044, -1.047, 0.313, 1.447, -0.865, 1.364, -0.715, -0.800, + 0.541}}, + {L"TrigonometricInputValueSet_RangeOne", + {0.331, 0.727, -0.957, 0.677, -0.025, 0.495, 0.855, -0.673, -0.678, + -0.905}}, + }; +}; + +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", + {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0}}, + {L"DefaultInputValueSet2", + {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0}}, + // Range [ -pi/2, pi/2] + {L"TrigonometricInputValueSet_RangeHalfPi", + {0.315f, -0.316f, 1.409f, -0.09f, -1.569f, 1.302f, -0.326f, 0.781f, + -1.235f, 0.623f}}, + {L"TrigonometricInputValueSet_RangeOne", + {0.727f, 0.331f, -0.957f, 0.677f, -0.025f, 0.495f, 0.855f, -0.673f, + -0.678f, -0.905f}}, + }; +}; + +template <> struct LongVectorTestData { + inline static const std::map> Data = { + {L"DefaultInputValueSet1", + {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0}}, + {L"DefaultInputValueSet2", + {1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0, 1.0, -1.0}}, + // Range [ -pi/2, pi/2] + {L"TrigonometricInputValueSet_RangeHalfPi", + {0.807, 0.605, 1.317, 0.188, 1.566, -1.507, 0.67, -1.553, 0.194, + -0.883}}, + {L"TrigonometricInputValueSet_RangeOne", + {0.331, 0.277, -0.957, 0.677, -0.025, 0.495, 0.855, -0.673, -0.678, + -0.905}}}; +}; + +#endif // LONGVECTORTESTDATA_H diff --git a/tools/clang/unittests/HLSLExec/LongVectors.cpp b/tools/clang/unittests/HLSLExec/LongVectors.cpp new file mode 100644 index 0000000000..b9e79cfc5e --- /dev/null +++ b/tools/clang/unittests/HLSLExec/LongVectors.cpp @@ -0,0 +1,341 @@ +#include "LongVectors.h" +#include "HlslExecTestUtils.h" +#include + +LongVector::BinaryOpType +LongVector::getBinaryOpType(const std::wstring &OpTypeString) { + return getLongVectorOpType( + binaryOpTypeStringToEnumMap, OpTypeString, + std::size(binaryOpTypeStringToEnumMap)); +} + +LongVector::UnaryOpType +LongVector::getUnaryOpType(const std::wstring &OpTypeString) { + return getLongVectorOpType( + unaryOpTypeStringToEnumMap, OpTypeString, + std::size(unaryOpTypeStringToEnumMap)); +} + +LongVector::TrigonometricOpType +LongVector::getTrigonometricOpType(const std::wstring &OpTypeString) { + return getLongVectorOpType( + trigonometricOpTypeStringToEnumMap, OpTypeString, + std::size(trigonometricOpTypeStringToEnumMap)); +} + +// These are helper arrays to be used with the TableParameterHandler that parses +// the LongVectorOpTable.xml file for us. +static TableParameter BinaryOpParameters[] = { + {L"DataType", TableParameter::STRING, true}, + {L"OpTypeEnum", TableParameter::STRING, true}, + {L"InputValueSetName1", TableParameter::STRING, false}, + {L"InputValueSetName2", TableParameter::STRING, false}, +}; + +static TableParameter UnaryOpParameters[] = { + {L"DataType", TableParameter::STRING, true}, + {L"OpTypeEnum", TableParameter::STRING, true}, + {L"InputValueSetName1", TableParameter::STRING, false}, +}; + +bool LongVector::OpTest::classSetup() { + // Run this only once. + if (!Initialized) { + Initialized = true; + + HMODULE Runtime = LoadLibraryW(L"d3d12.dll"); + if (Runtime == NULL) + return false; + // Do not: FreeLibrary(hRuntime); + // If we actually free the library, it defeats the purpose of + // enableAgilitySDK and enableExperimentalMode. + + HRESULT HR; + HR = enableAgilitySDK(Runtime); + + if (FAILED(HR)) + hlsl_test::LogCommentFmt(L"Unable to enable Agility SDK - 0x%08x.", HR); + else if (HR == S_FALSE) + hlsl_test::LogCommentFmt(L"Agility SDK not enabled."); + else + hlsl_test::LogCommentFmt(L"Agility SDK enabled."); + + HR = enableExperimentalMode(Runtime); + if (FAILED(HR)) + hlsl_test::LogCommentFmt( + L"Unable to enable shader experimental mode - 0x%08x.", HR); + else if (HR == S_FALSE) + hlsl_test::LogCommentFmt(L"Experimental mode not enabled."); + else + hlsl_test::LogCommentFmt(L"Experimental mode enabled."); + + HR = enableDebugLayer(); + if (FAILED(HR)) + hlsl_test::LogCommentFmt(L"Unable to enable debug layer - 0x%08x.", HR); + else if (HR == S_FALSE) + hlsl_test::LogCommentFmt(L"Debug layer not enabled."); + else + hlsl_test::LogCommentFmt(L"Debug layer enabled."); + } + + return true; +} + +TEST_F(LongVector::OpTest, binaryOpTest) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + + using namespace WEX::Common; + + const int TableSize = sizeof(BinaryOpParameters) / sizeof(TableParameter); + TableParameterHandler Handler(BinaryOpParameters, TableSize); + + std::wstring DataType(Handler.GetTableParamByName(L"DataType")->m_str); + std::wstring OpTypeString(Handler.GetTableParamByName(L"OpTypeEnum")->m_str); + + auto OpType = LongVector::getBinaryOpType(OpTypeString); + dispatchTestByDataType(OpType, DataType, Handler); +} + +TEST_F(LongVector::OpTest, trigonometricOpTest) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + + const int TableSize = sizeof(UnaryOpParameters) / sizeof(TableParameter); + TableParameterHandler Handler(UnaryOpParameters, TableSize); + + std::wstring DataType(Handler.GetTableParamByName(L"DataType")->m_str); + std::wstring OpTypeString(Handler.GetTableParamByName(L"OpTypeEnum")->m_str); + + auto OpType = LongVector::getTrigonometricOpType(OpTypeString); + dispatchTestByDataType(OpType, DataType, Handler); +} + +TEST_F(LongVector::OpTest, unaryOpTest) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + + const int TableSize = sizeof(UnaryOpParameters) / sizeof(TableParameter); + TableParameterHandler Handler(UnaryOpParameters, TableSize); + + std::wstring DataType(Handler.GetTableParamByName(L"DataType")->m_str); + std::wstring OpTypeString(Handler.GetTableParamByName(L"OpTypeEnum")->m_str); + + auto OpType = LongVector::getUnaryOpType(OpTypeString); + dispatchTestByDataType(OpType, DataType, Handler); +} + +template +void LongVector::OpTest::dispatchTestByDataType( + LongVectorOpTypeT OpType, std::wstring DataType, + TableParameterHandler &Handler) { + using namespace WEX::Common; + + if (DataType == L"bool") + dispatchTestByVectorSize(OpType, Handler); + else if (DataType == L"int16") + dispatchTestByVectorSize(OpType, Handler); + else if (DataType == L"int32") + dispatchTestByVectorSize(OpType, Handler); + else if (DataType == L"int64") + dispatchTestByVectorSize(OpType, Handler); + else if (DataType == L"uint16") + dispatchTestByVectorSize(OpType, Handler); + else if (DataType == L"uint32") + dispatchTestByVectorSize(OpType, Handler); + else if (DataType == L"uint64") + dispatchTestByVectorSize(OpType, Handler); + else if (DataType == L"float16") + dispatchTestByVectorSize(OpType, Handler); + else if (DataType == L"float32") + dispatchTestByVectorSize(OpType, Handler); + else if (DataType == L"float64") + dispatchTestByVectorSize(OpType, Handler); + else + VERIFY_FAIL( + String().Format(L"DataType: %s is not recognized.", DataType.c_str())); +} + +template +void LongVector::OpTest::dispatchTestByVectorSize( + LongVectorOpTypeT opType, TableParameterHandler &Handler) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + + LongVector::TestConfig TestConfig(opType); + + // InputValueSetName1 is optional. So the string may be empty. An empty + // string will result in the default value set for this DataType being used. + std::wstring InputValueSet1( + Handler.GetTableParamByName(L"InputValueSetName1")->m_str); + if (!InputValueSet1.empty()) + TestConfig.setInputValueSet1(InputValueSet1); + + // InputValueSetName2 is optional. So the string may be empty. An empty + // string will result in the default value set for this DataType being used. + if (TestConfig.isBinaryOp()) { + std::wstring InputValueSet2( + Handler.GetTableParamByName(L"InputValueSetName2")->m_str); + if (!InputValueSet2.empty()) + TestConfig.setInputValueSet2(InputValueSet2); + } + + std::vector InputVectorSizes = {3, 4, 5, 16, 17, 35, 100, 256, 1024}; + for (auto SizeToTest : InputVectorSizes) { + testBaseMethod(TestConfig, SizeToTest); + } +} + +template +void LongVector::OpTest::testBaseMethod( + LongVector::TestConfig &TestConfig, + size_t VectorSizeToTest) { + WEX::TestExecution::SetVerifyOutput verifySettings( + WEX::TestExecution::VerifyOutputSettings::LogOnlyFailures); + + hlsl_test::LogCommentFmt(L"Running LongVectorOpTestBase<%S, %zu>", + typeid(DataTypeT).name(), VectorSizeToTest); + + bool LogInputs = false; + WEX::TestExecution::RuntimeParameters::TryGetValue(L"LongVectorLogInputs", + LogInputs); + + CComPtr D3DDevice; + if (!createDevice(&D3DDevice, ExecTestUtils::D3D_SHADER_MODEL_6_9, false)) { +#ifdef _HLK_CONF + LOG_ERROR_FMT_THROW( + L"Device does not support SM 6.9. Can't run these tests."); +#else + WEX::Logging::Log::Comment( + "Device does not support SM 6.9. Can't run these tests."); + WEX::Logging::Log::Result(WEX::Logging::TestResults::Skipped); + return; +#endif + } + + std::vector InputVector1; + InputVector1.reserve(VectorSizeToTest); + std::vector InputVector2; // May be unused, but must be defined. + InputVector2.reserve(VectorSizeToTest); + std::vector ScalarInput; // May be unused, but must be defined. + const bool IsVectorBinaryOp = + TestConfig.isBinaryOp() && !TestConfig.isScalarOp(); + + std::vector InputVector1ValueSet = TestConfig.getInputValueSet1(); + std::vector InputVector2ValueSet = + TestConfig.isBinaryOp() ? TestConfig.getInputValueSet2() + : std::vector(); + + if (TestConfig.isScalarOp()) + // Scalar ops are always binary ops. So InputVector2ValueSet is initialized + // with values above. + ScalarInput.push_back(InputVector2ValueSet[0]); + + // Fill the input vectors with values from the value set. Repeat the values + // when we reach the end of the value set. + for (size_t Index = 0; Index < VectorSizeToTest; Index++) { + InputVector1.push_back( + InputVector1ValueSet[Index % InputVector1ValueSet.size()]); + + if (IsVectorBinaryOp) + InputVector2.push_back( + InputVector2ValueSet[Index % InputVector2ValueSet.size()]); + } + + std::vector ExpectedVector; + ExpectedVector.reserve(VectorSizeToTest); + if (IsVectorBinaryOp) + ExpectedVector = + computeExpectedValues(InputVector1, InputVector2, TestConfig); + else if (TestConfig.isScalarOp()) + ExpectedVector = + computeExpectedValues(InputVector1, ScalarInput[0], TestConfig); + else // Must be a unary op + ExpectedVector = computeExpectedValues(InputVector1, TestConfig); + + if (LogInputs) { + logLongVector(InputVector1, L"InputVector1"); + + if (IsVectorBinaryOp) + logLongVector(InputVector2, L"InputVector2"); + else if (TestConfig.isScalarOp()) + logLongVector(ScalarInput, L"ScalarInput"); + } + + // We have to construct the string outside of the lambda. Otherwise it's + // cleaned up when the lambda finishes executing but before the shader runs. + std::string CompilerOptionsString = + TestConfig.getCompilerOptionsString(VectorSizeToTest); + + // The name of the shader we want to use in ShaderOpArith.xml. Could also add + // logic to set this name in ShaderOpArithTable.xml so we can use different + // shaders for different tests. + LPCSTR ShaderName = "LongVectorOp"; + // ShaderOpArith.xml defines the input/output resources and the shader source. + CComPtr TestXML; + readHlslDataIntoNewStream(L"ShaderOpArith.xml", &TestXML, DxcDllSupport); + + // RunShaderOpTest is a helper function that handles resource creation + // and setup. It also handles the shader compilation and execution. It takes a + // callback that is called when the shader is compiled, but before it is + // executed. + std::shared_ptr TestResult = st::RunShaderOpTest( + D3DDevice, DxcDllSupport, TestXML, ShaderName, + [&](LPCSTR Name, std::vector &ShaderData, st::ShaderOp *ShaderOp) { + hlsl_test::LogCommentFmt(L"RunShaderOpTest CallBack. Resource Name: %S", + Name); + + // This callback is called once for each resource defined for + // "LongVectorOp" in ShaderOpArith.xml. All callbacks are fired for each + // resource. We determine whether they are applicable to the test case + // when they run. + + // Process the callback for the OutputVector resource. + if (0 == _stricmp(Name, "OutputVector")) { + // We only need to set the compiler options string once. So this is a + // convenient place to do it. + ShaderOp->Shaders.at(0).Arguments = CompilerOptionsString.c_str(); + + return; + } + + // Process the callback for the InputFuncArgs resource. + if (0 == _stricmp(Name, "InputFuncArgs")) { + if (TestConfig.isScalarOp()) + fillShaderBufferFromLongVectorData(ShaderData, + ScalarInput); + return; + } + + // Process the callback for the InputVector1 resource. + if (0 == _stricmp(Name, "InputVector1")) { + fillShaderBufferFromLongVectorData(ShaderData, + InputVector1); + return; + } + + // Process the callback for the InputVector2 resource. + if (0 == _stricmp(Name, "InputVector2")) { + if (IsVectorBinaryOp) + fillShaderBufferFromLongVectorData(ShaderData, + InputVector2); + + return; + } + + LOG_ERROR_FMT_THROW( + L"RunShaderOpTest CallBack. Unexpected Resource Name: %S", Name); + }); + + // Map the data from GPU to CPU memory so we can verify our expectations. + MappedData ShaderOutData; + TestResult->Test->GetReadBackData("OutputVector", &ShaderOutData); + + std::vector OutputVector; + fillLongVectorDataFromShaderBuffer(ShaderOutData, OutputVector, + VectorSizeToTest); + + VERIFY_SUCCEEDED(doVectorsMatch(OutputVector, ExpectedVector, + TestConfig.getTolerance(), + TestConfig.getValidationType())); +} diff --git a/tools/clang/unittests/HLSLExec/LongVectors.h b/tools/clang/unittests/HLSLExec/LongVectors.h new file mode 100644 index 0000000000..0e046d1966 --- /dev/null +++ b/tools/clang/unittests/HLSLExec/LongVectors.h @@ -0,0 +1,336 @@ +#ifndef LONGVECTORS_H +#define LONGVECTORS_H + +#include +#include +#include +#include +#include + +#include +#include + +#include + +#include "LongVectorTestData.h" +#include "ShaderOpTest.h" +#include "TableParameterHandler.h" +#include "dxc/Support/WinIncludes.h" +#include "dxc/Support/dxcapi.use.h" +#include "dxc/Test/HlslTestUtils.h" + +namespace LongVector { +template +class TestConfig; // Forward declaration + +class OpTest { +public: + BEGIN_TEST_CLASS(OpTest) + END_TEST_CLASS() + + TEST_CLASS_SETUP(classSetup); + + BEGIN_TEST_METHOD(binaryOpTest) + TEST_METHOD_PROPERTY(L"DataSource", + L"Table:LongVectorOpTable.xml#BinaryOpTable") + END_TEST_METHOD() + + BEGIN_TEST_METHOD(trigonometricOpTest) + TEST_METHOD_PROPERTY(L"DataSource", + L"Table:LongVectorOpTable.xml#TrigonometricOpTable") + END_TEST_METHOD() + + BEGIN_TEST_METHOD(unaryOpTest) + TEST_METHOD_PROPERTY(L"DataSource", + L"Table:LongVectorOpTable.xml#UnaryOpTable") + END_TEST_METHOD() + + template + void dispatchTestByDataType(LongVectorOpTypeT OpType, std::wstring DataType, + TableParameterHandler &Handler); + + template + void dispatchTestByVectorSize(LongVectorOpTypeT OpType, + TableParameterHandler &Handler); + + template + void testBaseMethod( + LongVector::TestConfig &TestConfig, + size_t VectorSizeToTest); + +private: + dxc::DxcDllSupport DxcDllSupport; + bool Initialized = false; +}; + +template +void fillShaderBufferFromLongVectorData(std::vector &ShaderBuffer, + std::vector &TestData); + +template +void fillLongVectorDataFromShaderBuffer(MappedData &ShaderBuffer, + std::vector &TestData, + size_t NumElements); + +template constexpr bool isFloatingPointType() { + return std::is_same_v || + std::is_same_v || + std::is_same_v; +} + +struct LongVectorOpTypeStringToEnumValue { + std::wstring OpTypeString; + uint32_t OpTypeValue; +}; + +template +DataTypeT getLongVectorOpType(const LongVectorOpTypeStringToEnumValue *Values, + const std::wstring &OpTypeString, + std::size_t Length); + +enum ValidationType { + ValidationType_Epsilon, + ValidationType_Ulp, +}; + +enum BasicOpType { + BasicOpType_Binary, + BasicOpType_Unary, + BasicOpType_ScalarBinary, + BasicOpType_EnumValueCount +}; + +enum BinaryOpType { + BinaryOpType_ScalarAdd, + BinaryOpType_ScalarMultiply, + BinaryOpType_ScalarSubtract, + BinaryOpType_ScalarDivide, + BinaryOpType_ScalarModulus, + BinaryOpType_Multiply, + BinaryOpType_Add, + BinaryOpType_Subtract, + BinaryOpType_Divide, + BinaryOpType_Modulus, + BinaryOpType_Min, + BinaryOpType_Max, + BinaryOpType_ScalarMin, + BinaryOpType_ScalarMax, + BinaryOpType_EnumValueCount +}; + +static const LongVectorOpTypeStringToEnumValue binaryOpTypeStringToEnumMap[] = { + {L"BinaryOpType_ScalarAdd", BinaryOpType_ScalarAdd}, + {L"BinaryOpType_ScalarMultiply", BinaryOpType_ScalarMultiply}, + {L"BinaryOpType_ScalarSubtract", BinaryOpType_ScalarSubtract}, + {L"BinaryOpType_ScalarDivide", BinaryOpType_ScalarDivide}, + {L"BinaryOpType_ScalarModulus", BinaryOpType_ScalarModulus}, + {L"BinaryOpType_Add", BinaryOpType_Add}, + {L"BinaryOpType_Multiply", BinaryOpType_Multiply}, + {L"BinaryOpType_Subtract", BinaryOpType_Subtract}, + {L"BinaryOpType_Divide", BinaryOpType_Divide}, + {L"BinaryOpType_Modulus", BinaryOpType_Modulus}, + {L"BinaryOpType_Min", BinaryOpType_Min}, + {L"BinaryOpType_Max", BinaryOpType_Max}, + {L"BinaryOpType_ScalarMin", BinaryOpType_ScalarMin}, + {L"BinaryOpType_ScalarMax", BinaryOpType_ScalarMax}, +}; + +static_assert(_countof(binaryOpTypeStringToEnumMap) == + BinaryOpType_EnumValueCount, + "binaryOpTypeStringToEnumMap size mismatch. Did you " + "add a new enum value?"); + +BinaryOpType getBinaryOpType(const std::wstring &OpTypeString); + +enum UnaryOpType { UnaryOpType_Initialize, UnaryOpType_EnumValueCount }; + +static const LongVectorOpTypeStringToEnumValue unaryOpTypeStringToEnumMap[] = { + {L"UnaryOpType_Initialize", UnaryOpType_Initialize}, +}; + +static_assert(_countof(unaryOpTypeStringToEnumMap) == + UnaryOpType_EnumValueCount, + "unaryOpTypeStringToEnumMap size mismatch. Did you add " + "a new enum value?"); + +UnaryOpType getUnaryOpType(const std::wstring &OpTypeString); + +enum TrigonometricOpType { + TrigonometricOpType_Acos, + TrigonometricOpType_Asin, + TrigonometricOpType_Atan, + TrigonometricOpType_Cos, + TrigonometricOpType_Cosh, + TrigonometricOpType_Sin, + TrigonometricOpType_Sinh, + TrigonometricOpType_Tan, + TrigonometricOpType_Tanh, + TrigonometricOpType_EnumValueCount +}; + +static const LongVectorOpTypeStringToEnumValue + trigonometricOpTypeStringToEnumMap[] = { + {L"TrigonometricOpType_Acos", TrigonometricOpType_Acos}, + {L"TrigonometricOpType_Asin", TrigonometricOpType_Asin}, + {L"TrigonometricOpType_Atan", TrigonometricOpType_Atan}, + {L"TrigonometricOpType_Cos", TrigonometricOpType_Cos}, + {L"TrigonometricOpType_Cosh", TrigonometricOpType_Cosh}, + {L"TrigonometricOpType_Sin", TrigonometricOpType_Sin}, + {L"TrigonometricOpType_Sinh", TrigonometricOpType_Sinh}, + {L"TrigonometricOpType_Tan", TrigonometricOpType_Tan}, + {L"TrigonometricOpType_Tanh", TrigonometricOpType_Tanh}, +}; + +static_assert(_countof(trigonometricOpTypeStringToEnumMap) == + TrigonometricOpType_EnumValueCount, + "trigonometricOpTypeStringToEnumMap size mismatch. Did you add " + "a new enum value?"); + +TrigonometricOpType getTrigonometricOpType(const std::wstring &OpTypeString); + +template +std::vector getInputValueSetByKey(const std::wstring &Key, + bool LogKey = true) { + if (LogKey) + WEX::Logging::Log::Comment( + WEX::Common::String().Format(L"Using Value Set Key: %s", Key.c_str())); + return std::vector(LongVectorTestData::Data.at(Key)); +} + +template +DataTypeT mod(const DataTypeT &A, const DataTypeT &B); + +template struct TestConfigTraits { + TestConfigTraits(LongVectorOpTypeT OpType) : OpType(OpType) {} + // LongVectorOpTypeT* Enum values. We don't use a UINT because + // we want the type data. + LongVectorOpTypeT OpType; +}; + +template +bool doValuesMatch(DataTypeT A, DataTypeT B, float Tolerance, ValidationType); +bool doValuesMatch(HLSLBool_t A, HLSLBool_t B, float, ValidationType); +bool doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, float Tolerance, + ValidationType ValidationType); +bool doValuesMatch(float A, float B, float Tolerance, + ValidationType ValidationType); +bool doValuesMatch(double A, double B, float Tolerance, + ValidationType ValidationType); + +template +bool doVectorsMatch(const std::vector &ActualValues, + const std::vector &ExpectedValues, + float Tolerance, ValidationType ValidationType); +// Binary ops +template +std::vector +computeExpectedValues(const std::vector &InputVector1, + const std::vector &InputVector2, + const TestConfig &Config); + +// Binary scalar ops +template +std::vector +computeExpectedValues(const std::vector &InputVector1, + const DataTypeT &ScalarInput, + const TestConfig &Config); + +// Unary ops +template +std::vector +computeExpectedValues(const std::vector &InputVector1, + const TestConfig &Config); + +template +void logLongVector(const std::vector &Values, + const std::wstring &Name); + +// Used to pass into LongVectorOpTestBase +template class TestConfig { +public: + TestConfig() = default; + + TestConfig(UnaryOpType OpType); + TestConfig(BinaryOpType OpType); + TestConfig(TrigonometricOpType OpType); + + bool isBinaryOp() const { + return BasicOpType == LongVector::BasicOpType_Binary || + BasicOpType == LongVector::BasicOpType_ScalarBinary; + } + + bool isUnaryOp() const { + return BasicOpType == LongVector::BasicOpType_Unary; + } + + bool isScalarOp() const { + return BasicOpType == LongVector::BasicOpType_ScalarBinary; + } + + bool hasFunctionDefinition() const; + std::string getOPERAND2String() const; + + // A helper to get the hlsl type as a string for a given C++ type. + // Used in the long vector tests. + std::string getHLSLTypeString() const; + + DataTypeT computeExpectedValue(const DataTypeT &A, const DataTypeT &B, + BinaryOpType OpType) const; + DataTypeT computeExpectedValue(const DataTypeT &A, const DataTypeT &B) const; + DataTypeT computeExpectedValue(const DataTypeT &A, + TrigonometricOpType OpType) const; + DataTypeT computeExpectedValue(const DataTypeT &A, UnaryOpType OpType) const; + DataTypeT computeExpectedValue(const DataTypeT &A) const; + + void setInputArgsArrayName(const std::wstring &InputArgsArrayName) { + this->InputArgsArrayName = InputArgsArrayName; + } + + void setInputValueSet1(const std::wstring &InputValueSetName) { + this->InputValueSetName1 = InputValueSetName; + } + + void setInputValueSet2(const std::wstring &InputValueSetName) { + this->InputValueSetName2 = InputValueSetName; + } + + std::vector getInputValueSet1() const { + return getInputValueSet(1); + } + + std::vector getInputValueSet2() const { + return getInputValueSet(2); + } + + std::vector getInputArgsArray() const; + + float getTolerance() const { return Tolerance; } + LongVector::ValidationType getValidationType() const { + return ValidationType; + } + + std::string getCompilerOptionsString(size_t VectorSize) const; + +private: + std::vector getInputValueSet(size_t ValueSetIndex) const; + + // To be used for the value of -DOPERATOR + std::string OperatorString; + // To be used for the value of -DFUNC + std::string IntrinsicString; + LongVector::BasicOpType BasicOpType = LongVector::BasicOpType_EnumValueCount; + float Tolerance = 0.0; + LongVector::ValidationType ValidationType = + LongVector::ValidationType::ValidationType_Epsilon; + LongVector::TestConfigTraits OpTypeTraits; + std::wstring InputValueSetName1 = L"DefaultInputValueSet1"; + std::wstring InputValueSetName2 = L"DefaultInputValueSet2"; + // No default args array + std::wstring InputArgsArrayName = L""; +}; // class LongVector::TestConfig + +}; // namespace LongVector + +#include "LongVectors.tpp" + +#endif // LONGVECTORS_H diff --git a/tools/clang/unittests/HLSLExec/LongVectors.tpp b/tools/clang/unittests/HLSLExec/LongVectors.tpp new file mode 100644 index 0000000000..29affa4b2e --- /dev/null +++ b/tools/clang/unittests/HLSLExec/LongVectors.tpp @@ -0,0 +1,650 @@ +template +DataTypeT LongVector::getLongVectorOpType(const LongVectorOpTypeStringToEnumValue *Values, + const std::wstring &OpTypeString, + std::size_t Length) { + for (size_t i = 0; i < Length; i++) { + if (Values[i].OpTypeString == OpTypeString) + return static_cast(Values[i].OpTypeValue); + } + + LOG_ERROR_FMT_THROW(L"Invalid LongVectorOpType string: %s", + OpTypeString.c_str()); + + return static_cast(UINT_MAX); +} + +// Helper to fill the shader buffer based on type. Convenient to be used when +// copying HLSL*_t types so we can copy the underlying type directly instead of +// the struct. +template +void LongVector::fillShaderBufferFromLongVectorData(std::vector &ShaderBuffer, std::vector &TestData) { + + // Note: DataSize for HLSLHalf_t and HLSLBool_t may be larger than the + // underlying type in some cases. Thats fine. Resize just makes sure we have + // enough space. + const size_t NumElements = TestData.size(); + const size_t DataSize = sizeof(DataTypeT) * NumElements; + ShaderBuffer.resize(DataSize); + + if constexpr (std::is_same_v) { + DirectX::PackedVector::HALF *ShaderBufferPtr = + reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < NumElements; ++i) + ShaderBufferPtr[i] = TestData[i].Val; + } else if constexpr (std::is_same_v) { + int32_t *ShaderBufferPtr = reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < NumElements; ++i) + ShaderBufferPtr[i] = TestData[i].Val; + } else { + DataTypeT *ShaderBufferPtr = + reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < NumElements; ++i) + ShaderBufferPtr[i] = TestData[i]; + } +} + +// Helpers so we do the right thing for float types. HLSLHalf_t is handled in an +// operator overload. +template +DataTypeT LongVector::mod(const DataTypeT &A, const DataTypeT &B) { + return A % B; +} + +template <> float LongVector::mod(const float &A, const float &B) { + return std::fmod(A, B); +} + +template <> double LongVector::mod(const double &A, const double &B) { + return std::fmod(A, B); +} + +// Helper to fill the test data from the shader buffer based on type. Convenient +// to be used when copying HLSL*_t types so we can use the underlying type. +template +void LongVector::fillLongVectorDataFromShaderBuffer(MappedData &ShaderBuffer, + std::vector &TestData, + size_t NumElements) { + if constexpr (std::is_same_v) { + DirectX::PackedVector::HALF *ShaderBufferPtr = + reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < NumElements; ++i) + // HLSLHalf_t has a DirectX::PackedVector::HALF based constructor. + TestData.push_back(ShaderBufferPtr[i]); + } else if constexpr (std::is_same_v) { + int32_t *ShaderBufferPtr = reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < NumElements; ++i) + // HLSLBool_t has a int32_t based constructor. + TestData.push_back(ShaderBufferPtr[i]); + } else { + DataTypeT *ShaderBufferPtr = + reinterpret_cast(ShaderBuffer.data()); + for (size_t i = 0; i < NumElements; ++i) + TestData.push_back(ShaderBufferPtr[i]); + } +} + +template +bool LongVector::doValuesMatch(DataTypeT A, DataTypeT B, float Tolerance, + LongVector::ValidationType) { + if (Tolerance == 0.0f) + return A == B; + + DataTypeT Diff = A > B ? A - B : B - A; + return Diff <= Tolerance; +} + +bool LongVector::doValuesMatch(HLSLBool_t A, HLSLBool_t B, float, + LongVector::ValidationType) { + return A == B; +} + +bool LongVector::doValuesMatch(HLSLHalf_t A, HLSLHalf_t B, float Tolerance, + LongVector::ValidationType ValidationType) { + switch (ValidationType) { + case LongVector::ValidationType_Epsilon: + return CompareHalfEpsilon(A.Val, B.Val, Tolerance); + case LongVector::ValidationType_Ulp: + return CompareHalfULP(A.Val, B.Val, Tolerance); + default: + WEX::Logging::Log::Error( + L"Invalid ValidationType. Expecting Epsilon or ULP."); + return false; + } +} + +bool LongVector::doValuesMatch(float A, float B, float Tolerance, + LongVector::ValidationType ValidationType) { + switch (ValidationType) { + case LongVector::ValidationType_Epsilon: + return CompareFloatEpsilon(A, B, Tolerance); + case LongVector::ValidationType_Ulp: { + // Tolerance is in ULPs. Convert to int for the comparison. + const int IntTolerance = static_cast(Tolerance); + return CompareFloatULP(A, B, IntTolerance); + }; + default: + WEX::Logging::Log::Error( + L"Invalid ValidationType. Expecting Epsilon or ULP."); + return false; + } +} + +bool LongVector::doValuesMatch(double A, double B, float Tolerance, + LongVector::ValidationType ValidationType) { + switch (ValidationType) { + case LongVector::ValidationType_Epsilon: + return CompareDoubleEpsilon(A, B, Tolerance); + case LongVector::ValidationType_Ulp: { + // Tolerance is in ULPs. Convert to int64_t for the comparison. + const int64_t IntTolerance = static_cast(Tolerance); + return CompareDoubleULP(A, B, IntTolerance); + }; + default: + WEX::Logging::Log::Error( + L"Invalid ValidationType. Expecting Epsilon or ULP."); + return false; + } +} + + +template +bool LongVector::doVectorsMatch(const std::vector &ActualValues, + const std::vector &ExpectedValues, + float Tolerance, + LongVector::ValidationType ValidationType) { + // Stash mismatched indexes for easy failure logging later + std::vector MismatchedIndexes; + VERIFY_IS_TRUE(ActualValues.size() == ExpectedValues.size(), + L"doVectorsMatch() called with mismatched vector sizes."); + for (size_t i = 0; i < ActualValues.size(); ++i) { + if (!doValuesMatch(ActualValues[i], ExpectedValues[i], Tolerance, + ValidationType)) + MismatchedIndexes.push_back(i); + } + + if (MismatchedIndexes.empty()) + return true; + + if (!MismatchedIndexes.empty()) { + for (size_t Index : MismatchedIndexes) { + std::wstringstream Wss(L""); + Wss << std::setprecision(15); // Set precision for floating point types + Wss << L"Mismatch at Index: " << Index; + Wss << L" Actual Value:" << ActualValues[Index] << ","; + Wss << L" Expected Value:" << ExpectedValues[Index]; + WEX::Logging::Log::Error(Wss.str().c_str()); + } + } + + return false; +} + +template +std::vector LongVector::computeExpectedValues( + const std::vector &InputVector1, + const std::vector &InputVector2, + const LongVector::TestConfig &Config) { + + VERIFY_IS_TRUE( + Config.isBinaryOp(), + L"computeExpectedValues() called with a non-binary op config."); + + std::vector ExpectedValues = {}; + + for (size_t i = 0; i < InputVector1.size(); ++i) + ExpectedValues.push_back( + Config.computeExpectedValue(InputVector1[i], InputVector2[i])); + + return ExpectedValues; +} + +template +std::vector LongVector::computeExpectedValues( + const std::vector &InputVector1, const DataTypeT &ScalarInput, + const LongVector::TestConfig &Config) { + + VERIFY_IS_TRUE(Config.isScalarOp(), L"computeExpectedValues() called with a " + L"non-binary non-scalar op config."); + + std::vector ExpectedValues; + + for (size_t i = 0; i < InputVector1.size(); ++i) + ExpectedValues.push_back( + Config.computeExpectedValue(InputVector1[i], ScalarInput)); + + return ExpectedValues; +} + +template +std::vector LongVector::computeExpectedValues( + const std::vector &InputVector1, + const LongVector::TestConfig &Config) { + + VERIFY_IS_TRUE(Config.isUnaryOp(), + L"computeExpectedValues() called with a non-unary op config."); + + std::vector ExpectedValues; + + for (size_t i = 0; i < InputVector1.size(); ++i) + ExpectedValues.push_back(Config.computeExpectedValue(InputVector1[i])); + + return ExpectedValues; +} + +template +void LongVector::logLongVector(const std::vector &Values, + const std::wstring &Name) { + WEX::Logging::Log::Comment( + WEX::Common::String().Format(L"LongVector Name: %s", Name.c_str())); + + const size_t LoggingWidth = 40; + + std::wstringstream Wss(L""); + Wss << L"LongVector Values: "; + Wss << L"["; + const size_t NumElements = Values.size(); + for (size_t i = 0; i < NumElements; i++) { + if (i % LoggingWidth == 0 && i != 0) + Wss << L"\n "; + Wss << Values[i]; + if (i != NumElements - 1) + Wss << L", "; + } + Wss << L" ]"; + + WEX::Logging::Log::Comment(Wss.str().c_str()); +} + +template +LongVector::TestConfig::TestConfig(LongVector::UnaryOpType OpType) + : OpTypeTraits(OpType) { + IntrinsicString = ""; + BasicOpType = LongVector::BasicOpType_Unary; + + if (isFloatingPointType()) + Tolerance = 1; + + switch (OpType) { + case LongVector::UnaryOpType_Initialize: + IntrinsicString = "TestInitialize"; + break; + default: + VERIFY_FAIL("Invalid UnaryOpType"); + } +} + +template +LongVector::TestConfig::TestConfig(LongVector::BinaryOpType OpType) + : OpTypeTraits(OpType) { + IntrinsicString = ""; + BasicOpType = LongVector::BasicOpType_Binary; + + if (isFloatingPointType()) + Tolerance = 1; + ValidationType = LongVector::ValidationType_Ulp; + + switch (OpType) { + case LongVector::BinaryOpType_ScalarAdd: + BasicOpType = LongVector::BasicOpType_ScalarBinary; + OperatorString = "+"; + break; + case LongVector::BinaryOpType_ScalarMultiply: + BasicOpType = LongVector::BasicOpType_ScalarBinary; + OperatorString = "*"; + break; + case LongVector::BinaryOpType_ScalarSubtract: + BasicOpType = LongVector::BasicOpType_ScalarBinary; + OperatorString = "-"; + break; + case LongVector::BinaryOpType_ScalarDivide: + BasicOpType = LongVector::BasicOpType_ScalarBinary; + OperatorString = "/"; + break; + case LongVector::BinaryOpType_ScalarModulus: + BasicOpType = LongVector::BasicOpType_ScalarBinary; + OperatorString = "%"; + break; + case LongVector::BinaryOpType_Multiply: + OperatorString = "*"; + break; + case LongVector::BinaryOpType_Add: + OperatorString = "+"; + break; + case LongVector::BinaryOpType_Subtract: + OperatorString = "-"; + break; + case LongVector::BinaryOpType_Divide: + OperatorString = "/"; + break; + case LongVector::BinaryOpType_Modulus: + OperatorString = "%"; + break; + case LongVector::BinaryOpType_Min: + OperatorString = ","; + IntrinsicString = "min"; + break; + case LongVector::BinaryOpType_Max: + OperatorString = ","; + IntrinsicString = "max"; + break; + case LongVector::BinaryOpType_ScalarMin: + BasicOpType = LongVector::BasicOpType_ScalarBinary; + OperatorString = ","; + IntrinsicString = "min"; + break; + case LongVector::BinaryOpType_ScalarMax: + BasicOpType = LongVector::BasicOpType_ScalarBinary; + OperatorString = ","; + IntrinsicString = "max"; + break; + default: + VERIFY_FAIL("Invalid BinaryOpType"); + } +} + +template +LongVector::TestConfig::TestConfig(LongVector::TrigonometricOpType OpType) + : OpTypeTraits(OpType) { + IntrinsicString = ""; + BasicOpType = LongVector::BasicOpType_Unary; + + // All trigonometric ops are floating point types. + // These trig functions are defined to have a max absolute error of 0.0008 + // as per the D3D functional specs. An example with this spec for sin and + // cos is available here: + // https://microsoft.github.io/DirectX-Specs/d3d/archive/D3D11_3_FunctionalSpec.htm#22.10.20 + ValidationType = LongVector::ValidationType_Epsilon; + if (std::is_same_v) + Tolerance = 0.0010f; + else if (std::is_same_v) + Tolerance = 0.0008f; + else + VERIFY_FAIL( + "Invalid type for trigonometric op. Expecting half or float."); + + switch (OpType) { + case LongVector::TrigonometricOpType_Acos: + IntrinsicString = "acos"; + break; + case LongVector::TrigonometricOpType_Asin: + IntrinsicString = "asin"; + break; + case LongVector::TrigonometricOpType_Atan: + IntrinsicString = "atan"; + break; + case LongVector::TrigonometricOpType_Cos: + IntrinsicString = "cos"; + break; + case LongVector::TrigonometricOpType_Cosh: + IntrinsicString = "cosh"; + break; + case LongVector::TrigonometricOpType_Sin: + IntrinsicString = "sin"; + break; + case LongVector::TrigonometricOpType_Sinh: + IntrinsicString = "sinh"; + break; + case LongVector::TrigonometricOpType_Tan: + IntrinsicString = "tan"; + break; + case LongVector::TrigonometricOpType_Tanh: + IntrinsicString = "tanh"; + break; + default: + VERIFY_FAIL("Invalid TrigonometricOpType"); + } +} + +template +bool LongVector::TestConfig::hasFunctionDefinition() const { + if constexpr (std::is_same_v) { + if (OpTypeTraits.OpType == LongVector::UnaryOpType_Initialize) + return true; + else + return false; + } + + return false; +} + +template +std::string LongVector::TestConfig::getOPERAND2String() const { + if (hasFunctionDefinition()) { + switch (static_cast(OpTypeTraits.OpType)) { + case LongVector::UnaryOpType_Initialize: + return std::string(" -DFUNC_INITIALIZE=1"); + default: + VERIFY_FAIL("Invalid UnaryOpType"); + } + } + return std::string(""); +} + +template +std::string LongVector::TestConfig::getHLSLTypeString() const { + if (std::is_same_v) + return "bool"; + if (std::is_same_v) + return "half"; + if (std::is_same_v) + return "float"; + if (std::is_same_v) + return "double"; + if (std::is_same_v) + return "int16_t"; + if (std::is_same_v) + return "int"; + if (std::is_same_v) + return "int64_t"; + if (std::is_same_v) + return "uint16_t"; + if (std::is_same_v) + return "uint32_t"; + if (std::is_same_v) + return "uint64_t"; + + std::string ErrStr("getHLSLTypeString() Unsupported type: "); + ErrStr.append(typeid(DataTypeT).name()); + VERIFY_IS_TRUE(false, ErrStr.c_str()); + return "UnknownType"; +} + +template +DataTypeT LongVector::TestConfig::computeExpectedValue(const DataTypeT &A, const DataTypeT &B, + LongVector::BinaryOpType OpType) const { + switch (OpType) { + case LongVector::BinaryOpType_ScalarAdd: + return A + B; + case LongVector::BinaryOpType_ScalarMultiply: + return A * B; + case LongVector::BinaryOpType_ScalarSubtract: + return A - B; + case LongVector::BinaryOpType_ScalarDivide: + return A / B; + case LongVector::BinaryOpType_ScalarModulus: + return mod(A, B); + case LongVector::BinaryOpType_Multiply: + return A * B; + case LongVector::BinaryOpType_Add: + return A + B; + case LongVector::BinaryOpType_Subtract: + return A - B; + case LongVector::BinaryOpType_Divide: + return A / B; + case LongVector::BinaryOpType_Modulus: + return mod(A, B); + case LongVector::BinaryOpType_Min: + // std::max and std::min are wrapped in () to avoid collisions with the // + // macro defintions for min and max in windows.h + return (std::min)(A, B); + case LongVector::BinaryOpType_Max: + return (std::max)(A, B); + case LongVector::BinaryOpType_ScalarMin: + return (std::min)(A, B); + case LongVector::BinaryOpType_ScalarMax: + return (std::max)(A, B); + default: + LOG_ERROR_FMT_THROW(L"Unknown BinaryOpType: %d", OpTypeTraits.OpType); + return DataTypeT(); + } +} + +template +DataTypeT LongVector::TestConfig::computeExpectedValue(const DataTypeT &A, const DataTypeT &B) const { + if(!isBinaryOp()) + LOG_ERROR_FMT_THROW( + L"computeExpectedValue(const DataTypeT &A, const DataTypeT &B) called " + L"on a unary op: %d", + OpTypeTraits.OpType); + + return computeExpectedValue(A, B, static_cast(OpTypeTraits.OpType)); +} + + +template +DataTypeT LongVector::TestConfig::computeExpectedValue(const DataTypeT &A, + LongVector::UnaryOpType OpType) const { + switch (OpType) { + case LongVector::UnaryOpType_Initialize: + return A; + default: + LOG_ERROR_FMT_THROW(L"Unknown UnaryOpType :%d", OpTypeTraits.OpType); + return DataTypeT(); + } +} + +template +DataTypeT LongVector::TestConfig::computeExpectedValue(const DataTypeT &A) const { + + if constexpr (std::is_same_v) { + const auto OpType = static_cast(OpTypeTraits.OpType); + // HLSLHalf_t is a struct. We need to call the constructor to get the + // expected value. + return computeExpectedValue(A, OpType); + } + + if constexpr (std::is_same_v) { + const auto OpType = static_cast(OpTypeTraits.OpType); + // HLSLHalf_t is a struct. We need to call the constructor to get the + // expected value. + return computeExpectedValue(A, OpType); + } + + LOG_ERROR_FMT_THROW( + L"computeExpectedValue(const DataType&A) called on an unrecognized binary op: %d", + OpTypeTraits.OpType); + + return DataTypeT(); +} + +template +DataTypeT LongVector::TestConfig::computeExpectedValue(const DataTypeT &A, + LongVector::TrigonometricOpType OpType) const { + // The trig functions are only valid on floating point types. The constexpr in + // this case is a relatively easy and clean way to prevent the compiler from + // erroring out trying to resolve these for the non floating point types. We + // won't use them in the first place. + if constexpr (isFloatingPointType()) { + switch (OpType) { + case LongVector::TrigonometricOpType_Acos: + return std::acos(A); + case LongVector::TrigonometricOpType_Asin: + return std::asin(A); + case LongVector::TrigonometricOpType_Atan: + return std::atan(A); + case LongVector::TrigonometricOpType_Cos: + return std::cos(A); + case LongVector::TrigonometricOpType_Cosh: + return std::cosh(A); + case LongVector::TrigonometricOpType_Sin: + return std::sin(A); + case LongVector::TrigonometricOpType_Sinh: + return std::sinh(A); + case LongVector::TrigonometricOpType_Tan: + return std::tan(A); + case LongVector::TrigonometricOpType_Tanh: + return std::tanh(A); + default: + LOG_ERROR_FMT_THROW(L"Unknown TrigonometricOpType: %d", + OpTypeTraits.OpType); + return DataTypeT(); + } + } + + LOG_ERROR_FMT_THROW(L"ComputeExpectedValue(const DataTypeT &A, " + L"LongVectorOpTypeT OpType) called on a " + L"non-float type: %d", + OpType); + + return DataTypeT(); +} + +template +std::vector LongVector::TestConfig::getInputArgsArray() const { + + std::vector InputArgs; + + std::wstring InputArgsArrayName = this->InputArgsArrayName; + + if (InputArgsArrayName.empty()) + VERIFY_FAIL("No args array name set."); + + if (std::is_same_v && isClampOp()) + VERIFY_FAIL("Clamp is not supported for bools."); + else + return getInputValueSetByKey(InputArgsArrayName, false); + + VERIFY_FAIL("Invalid type for args array."); + return std::vector(); +} + +template +std::string LongVector::TestConfig::getCompilerOptionsString(size_t VectorSize) const { + std::stringstream CompilerOptions(""); + std::string HLSLType = getHLSLTypeString(); + CompilerOptions << "-DTYPE="; + CompilerOptions << HLSLType; + CompilerOptions << " -DNUM="; + CompilerOptions << VectorSize; + const bool Is16BitType = + (HLSLType == "int16_t" || HLSLType == "uint16_t" || HLSLType == "half"); + CompilerOptions << (Is16BitType ? " -enable-16bit-types" : ""); + CompilerOptions << " -DOPERATOR="; + CompilerOptions << OperatorString; + + if (isBinaryOp()) { + CompilerOptions << " -DOPERAND2="; + CompilerOptions << (isScalarOp() ? "InputScalar" : "InputVector2"); + + if (isScalarOp()) + CompilerOptions << " -DIS_SCALAR_OP=1"; + else + CompilerOptions << " -DIS_BINARY_VECTOR_OP=1"; + + CompilerOptions << " -DFUNC="; + CompilerOptions << IntrinsicString; + } else { // Unary Op + CompilerOptions << " -DFUNC="; + CompilerOptions << IntrinsicString; + CompilerOptions << " -DOPERAND2="; + CompilerOptions << getOPERAND2String(); + } + + return CompilerOptions.str(); +} + +template +std::vector LongVector::TestConfig::getInputValueSet(size_t ValueSetIndex) const { + if (ValueSetIndex == 2 && !isBinaryOp()) + VERIFY_FAIL("ValueSetindex==2 is only valid for binary ops."); + + std::wstring InputValueSetName = L""; + if (ValueSetIndex == 1) + InputValueSetName = InputValueSetName1; + else if (ValueSetIndex == 2) + InputValueSetName = InputValueSetName2; + else + VERIFY_FAIL("Invalid ValueSetIndex"); + + return getInputValueSetByKey(InputValueSetName); +} diff --git a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml index e768f205f1..dbea8e2aaf 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpArith.xml +++ b/tools/clang/unittests/HLSLExec/ShaderOpArith.xml @@ -1976,7 +1976,7 @@ RWStructuredBuffer g_shareXchg64Buf : register(u5); groupshared uint64_t g_uint64Share[6]; - groupshared int64_t g_sint64Share[3]; + groupshared int64_t g_sint64Share[4]; groupshared uint64_t g_xchg64Share[64]; #define VEC_CALL(op, uav, ix, val) op(uav[ix*stride], val); @@ -2046,7 +2046,7 @@ // Zero-init shared memory, with special cases if (ix < 6) g_uint64Share[ix] = ix == 1 ? 99999999ULL | (99999999ULL << 32) : ix == 3 ? ~0ULL : 0; - if (ix < 3) + if (ix < 4) g_sint64Share[ix] = ix == 1 ? 99999999ULL | (99999999ULL << 32) : 0; if (ix < 64) g_xchg64Share[ix] = 0; @@ -2552,11 +2552,11 @@ void InitSharedMem(uint ix) { // Zero-init shared memory, with special cases - if (ix < 6) + if (ix < 7) g_uintShare[ix] = ix == 1 ? 99999999 : ix == 3 ? -1 : 0; - if (ix < 3) + if (ix < 4) g_sintShare[ix] = ix == 1 ? 99999999 : 0; - if (ix < 64) + if (ix < 65) g_xchgShare[ix] = 0; GroupMemoryBarrierWithGroupSync(); @@ -3750,4 +3750,71 @@ void MSMain(uint GID : SV_GroupIndex, + + RootFlags(0), UAV(u0), UAV(u1), UAV(u2), + UAV(u3) + + + + + + + + + + + + + + + TestInitialize(vector Vector) + { + vector VectorCopy = Vector; + return VectorCopy; + } + #endif + + RWByteAddressBuffer g_InputFuncArgs : register(u0); + RWByteAddressBuffer g_InputVector1 : register(u1); + RWByteAddressBuffer g_InputVector2 : register(u2); + RWByteAddressBuffer g_OutputVector : register(u3); + [numthreads(1,1,1)] + void main(uint GI : SV_GroupIndex) { + + vector InputVector1 = g_InputVector1.Load< vector >(0); + + #ifdef IS_BINARY_VECTOR_OP + vector InputVector2 = g_InputVector2.Load< vector >(0); + #endif + + #ifdef IS_SCALAR_OP + TYPE InputScalar = g_InputFuncArgs.Load(0); + #endif + + #ifdef FUNC_CLAMP + TYPE Clamp_ArgMin = g_InputFuncArgs.Load(0); + TYPE Clamp_ArgMax = g_InputFuncArgs.Load(sizeof(TYPE)); + vector ClampArgMinMax = {Clamp_ArgMin, Clamp_ArgMax}; + #endif + + vector OutputVector = FUNC(InputVector1 OPERATOR OPERAND2); + + g_OutputVector.Store< vector >(0, OutputVector); + }; + ]]> + + diff --git a/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp b/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp index e6c9b10f6c..60ce3a9241 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp +++ b/tools/clang/unittests/HLSLExec/ShaderOpTest.cpp @@ -10,7 +10,7 @@ /////////////////////////////////////////////////////////////////////////////// // We need to keep & fix these warnings to integrate smoothly with HLK -#pragma warning(error : 4100 4146 4242 4244 4267 4701 4389) +#pragma warning(error : 4100 4242 4244 4267 4701 4389) #include "d3dx12.h" #include @@ -258,6 +258,15 @@ void CommandListRefs::CreateForDevice(ID3D12Device *pDevice, bool compute) { IID_PPV_ARGS(&List))); } +ShaderOpTest::ShaderOpTest() { + m_hFence = CreateEvent(nullptr, FALSE, FALSE, nullptr); + if (m_hFence == nullptr) { + AtlThrow(HRESULT_FROM_WIN32(GetLastError())); + } +} + +ShaderOpTest::~ShaderOpTest() { CloseHandle(m_hFence); } + void ShaderOpTest::CopyBackResources() { CommandListRefs ResCommandList; ResCommandList.CreateForDevice(m_pDevice, m_pShaderOp->IsCompute()); @@ -423,10 +432,6 @@ void ShaderOpTest::CreateDevice() { CHECK_HR(m_pDevice->CreateFence(0, D3D12_FENCE_FLAG_NONE, __uuidof(ID3D12Fence), (void **)&m_pFence)); m_pFence->SetName(L"ShaderOpTest Fence"); - m_hFence = CreateEvent(nullptr, FALSE, FALSE, nullptr); - if (m_hFence == nullptr) { - AtlThrow(HRESULT_FROM_WIN32(GetLastError())); - } } static void InitByteCode(D3D12_SHADER_BYTECODE *pBytecode, ID3D10Blob *pBlob) { @@ -861,6 +866,11 @@ void ShaderOpTest::CreateShaders() { CHECK_HR(pLibrary->CreateBlobWithEncodingFromPinned( pText, (UINT32)strlen(pText), CP_UTF8, &pTextBlob)); CHECK_HR(m_pDxcSupport->CreateInstance(CLSID_DxcCompiler, &pCompiler)); + WEX::Logging::Log::Comment(L"Compiling shader:"); + ShaderOpLogFmt(L"\tTarget profile: %S", S.Target); + if (argumentsWList.size() > 0) { + ShaderOpLogFmt(L"\tArguments: %S", pArguments); + } CHECK_HR(pCompiler->Compile(pTextBlob, nameW, entryPointW, targetW, (LPCWSTR *)argumentsWList.data(), (UINT32)argumentsWList.size(), nullptr, 0, @@ -2747,6 +2757,74 @@ bool ShaderOpParser::ReadAtElementName(IXmlReader *pReader, LPCWSTR pName) { } } +std::shared_ptr +RunShaderOpTestAfterParse(ID3D12Device *pDevice, dxc::DxcDllSupport &support, + LPCSTR pName, + st::ShaderOpTest::TInitCallbackFn pInitCallback, + st::ShaderOpTest::TShaderCallbackFn pShaderCallback, + std::shared_ptr ShaderOpSet) { + st::ShaderOp *pShaderOp; + if (pName == nullptr) { + if (ShaderOpSet->ShaderOps.size() != 1) { + VERIFY_FAIL(L"Expected a single shader operation."); + } + pShaderOp = ShaderOpSet->ShaderOps[0].get(); + } else { + pShaderOp = ShaderOpSet->GetShaderOp(pName); + } + if (pShaderOp == nullptr) { + std::string msg = "Unable to find shader op "; + msg += pName; + msg += "; available ops"; + const char sep = ':'; + for (auto &pAvailOp : ShaderOpSet->ShaderOps) { + msg += sep; + msg += pAvailOp->Name ? pAvailOp->Name : "[n/a]"; + } + CA2W msgWide(msg.c_str()); + VERIFY_FAIL(msgWide.m_psz); + } + + // This won't actually be used since we're supplying the device, + // but let's make it consistent. + pShaderOp->UseWarpDevice = hlsl_test::GetTestParamUseWARP(true); + + std::shared_ptr test = std::make_shared(); + test->SetDxcSupport(&support); + test->SetInitCallback(pInitCallback); + test->SetShaderCallback(pShaderCallback); + test->SetDevice(pDevice); + test->RunShaderOp(pShaderOp); + + std::shared_ptr result = + std::make_shared(); + result->ShaderOpSet = ShaderOpSet; + result->Test = test; + result->ShaderOp = pShaderOp; + return result; +} + +std::shared_ptr +RunShaderOpTestAfterParse(ID3D12Device *pDevice, dxc::DxcDllSupport &support, + LPCSTR pName, + st::ShaderOpTest::TInitCallbackFn pInitCallback, + std::shared_ptr ShaderOpSet) { + return RunShaderOpTestAfterParse(pDevice, support, pName, pInitCallback, + nullptr, ShaderOpSet); +} + +std::shared_ptr +RunShaderOpTest(ID3D12Device *pDevice, dxc::DxcDllSupport &support, + IStream *pStream, LPCSTR pName, + st::ShaderOpTest::TInitCallbackFn pInitCallback) { + DXASSERT_NOMSG(pStream != nullptr); + std::shared_ptr ShaderOpSet = + std::make_shared(); + st::ParseShaderOpSetFromStream(pStream, ShaderOpSet.get()); + return RunShaderOpTestAfterParse(pDevice, support, pName, pInitCallback, + ShaderOpSet); +} + #pragma endregion Parsing support } // namespace st diff --git a/tools/clang/unittests/HLSLExec/ShaderOpTest.h b/tools/clang/unittests/HLSLExec/ShaderOpTest.h index e65bd9e4e5..e8298fc8d9 100644 --- a/tools/clang/unittests/HLSLExec/ShaderOpTest.h +++ b/tools/clang/unittests/HLSLExec/ShaderOpTest.h @@ -12,12 +12,12 @@ // results. // // // /////////////////////////////////////////////////////////////////////////////// - -#pragma once - #ifndef __SHADEROPTEST_H__ #define __SHADEROPTEST_H__ +#include +#include +#include #include #include #include @@ -26,7 +26,7 @@ #include // We need to keep & fix these warnings to integrate smoothly with HLK -#pragma warning(error : 4100 4146 4242 4244 4267 4701 4389) +#pragma warning(error : 4100 4242 4244 4267 4701 4389) /////////////////////////////////////////////////////////////////////////////// // Forward declarations. @@ -275,6 +275,9 @@ class ShaderOpTest { typedef std::function TShaderCallbackFn; + + ShaderOpTest(); + ~ShaderOpTest(); void GetPipelineStats(D3D12_QUERY_DATA_PIPELINE_STATISTICS *pStats); void GetReadBackData(LPCSTR pResourceName, MappedData *pData); void RunShaderOp(ShaderOp *pShaderOp); @@ -341,6 +344,32 @@ void ParseShaderOpSetFromStream(IStream *pStream, ShaderOpSet *pShaderOpSet); // Deserialize a ShaderOpSet from an IXmlReader instance. void ParseShaderOpSetFromXml(IXmlReader *pReader, ShaderOpSet *pShaderOpSet); +/////////////////////////////////////////////////////////////////////////////// +// RunShaderOpTest* helper functions. +struct ShaderOpTestResult { + st::ShaderOp *ShaderOp; + std::shared_ptr ShaderOpSet; + std::shared_ptr Test; +}; + +std::shared_ptr +RunShaderOpTestAfterParse(ID3D12Device *pDevice, dxc::DxcDllSupport &support, + LPCSTR pName, + st::ShaderOpTest::TInitCallbackFn pInitCallback, + st::ShaderOpTest::TShaderCallbackFn pShaderCallback, + std::shared_ptr ShaderOpSet); + +std::shared_ptr +RunShaderOpTestAfterParse(ID3D12Device *pDevice, dxc::DxcDllSupport &support, + LPCSTR pName, + st::ShaderOpTest::TInitCallbackFn pInitCallback, + std::shared_ptr ShaderOpSet); + +std::shared_ptr +RunShaderOpTest(ID3D12Device *pDevice, dxc::DxcDllSupport &support, + IStream *pStream, LPCSTR pName, + st::ShaderOpTest::TInitCallbackFn pInitCallback); + } // namespace st #endif // __SHADEROPTEST_H__ diff --git a/tools/clang/unittests/HLSLExec/TableParameterHandler.cpp b/tools/clang/unittests/HLSLExec/TableParameterHandler.cpp new file mode 100644 index 0000000000..16badb074d --- /dev/null +++ b/tools/clang/unittests/HLSLExec/TableParameterHandler.cpp @@ -0,0 +1,376 @@ +#include "TableParameterHandler.h" +#include "dxc/Test/HlslTestUtils.h" + +TableParameterHandler::TableParameterHandler(TableParameter *pTable, + size_t size) + : m_table(pTable), m_tableSize(size) { + clearTableParameter(); + VERIFY_SUCCEEDED(ParseTableRow()); +} + +TableParameter *TableParameterHandler::GetTableParamByName(LPCWSTR name) { + for (size_t i = 0; i < m_tableSize; ++i) { + if (_wcsicmp(name, m_table[i].m_name) == 0) { + return &m_table[i]; + } + } + DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); + return nullptr; +} + +void TableParameterHandler::clearTableParameter() { + for (size_t i = 0; i < m_tableSize; ++i) { + m_table[i].m_int32 = 0; + m_table[i].m_uint = 0; + m_table[i].m_double = 0; + m_table[i].m_bool = false; + m_table[i].m_str = WEX::Common::String(); + } +} + +template +std::vector *TableParameterHandler::GetDataArray(LPCWSTR name) { + return nullptr; +} + +template <> +std::vector *TableParameterHandler::GetDataArray(LPCWSTR name) { + for (size_t i = 0; i < m_tableSize; ++i) { + if (_wcsicmp(name, m_table[i].m_name) == 0) { + return &(m_table[i].m_int32Table); + } + } + DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); + return nullptr; +} + +template <> +std::vector *TableParameterHandler::GetDataArray(LPCWSTR name) { + for (size_t i = 0; i < m_tableSize; ++i) { + if (_wcsicmp(name, m_table[i].m_name) == 0) { + return &(m_table[i].m_int8Table); + } + } + DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); + return nullptr; +} + +template <> +std::vector *TableParameterHandler::GetDataArray(LPCWSTR name) { + for (size_t i = 0; i < m_tableSize; ++i) { + if (_wcsicmp(name, m_table[i].m_name) == 0) { + return &(m_table[i].m_int16Table); + } + } + DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); + return nullptr; +} + +template <> +std::vector *TableParameterHandler::GetDataArray(LPCWSTR name) { + for (size_t i = 0; i < m_tableSize; ++i) { + if (_wcsicmp(name, m_table[i].m_name) == 0) { + return &(m_table[i].m_uint32Table); + } + } + DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); + return nullptr; +} + +template <> +std::vector *TableParameterHandler::GetDataArray(LPCWSTR name) { + for (size_t i = 0; i < m_tableSize; ++i) { + if (_wcsicmp(name, m_table[i].m_name) == 0) { + return &(m_table[i].m_floatTable); + } + } + DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); + return nullptr; +} + +template <> +std::vector *TableParameterHandler::GetDataArray(LPCWSTR name) { + for (size_t i = 0; i < m_tableSize; ++i) { + if (_wcsicmp(name, m_table[i].m_name) == 0) { + return &(m_table[i].m_halfTable); + } + } + DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); + return nullptr; +} + +template <> +std::vector *TableParameterHandler::GetDataArray(LPCWSTR name) { + for (size_t i = 0; i < m_tableSize; ++i) { + if (_wcsicmp(name, m_table[i].m_name) == 0) { + return &(m_table[i].m_doubleTable); + } + } + DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); + return nullptr; +} + +template <> +std::vector *TableParameterHandler::GetDataArray(LPCWSTR name) { + for (size_t i = 0; i < m_tableSize; ++i) { + if (_wcsicmp(name, m_table[i].m_name) == 0) { + return &(m_table[i].m_boolTable); + } + } + DXASSERT_ARGS(false, "Invalid Table Parameter Name %s", name); + return nullptr; +} + +HRESULT TableParameterHandler::ParseTableRow() { + TableParameter *table = m_table; + for (unsigned int i = 0; i < m_tableSize; ++i) { + switch (table[i].m_type) { + case TableParameter::INT8: + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + table[i].m_int32)) && + table[i].m_required) { + // TryGetValue does not suppport reading from int16 + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + table[i].m_int8 = (int8_t)(table[i].m_int32); + break; + case TableParameter::INT16: + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + table[i].m_int32)) && + table[i].m_required) { + // TryGetValue does not suppport reading from int16 + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + table[i].m_int16 = (short)(table[i].m_int32); + break; + case TableParameter::INT32: + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + table[i].m_int32)) && + table[i].m_required) { + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + break; + case TableParameter::UINT: + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + table[i].m_uint)) && + table[i].m_required) { + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + break; + case TableParameter::DOUBLE: + if (FAILED(WEX::TestExecution::TestData::TryGetValue( + table[i].m_name, table[i].m_double)) && + table[i].m_required) { + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + break; + case TableParameter::STRING: + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + table[i].m_str)) && + table[i].m_required) { + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + break; + case TableParameter::BOOL: + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + table[i].m_str)) && + table[i].m_bool) { + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + break; + case TableParameter::INT8_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + // TryGetValue does not suppport reading from int8 + table[i].m_int8Table.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + table[i].m_int8Table[j] = (int8_t)tempTable[j]; + } + break; + } + case TableParameter::INT16_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + // TryGetValue does not suppport reading from int8 + table[i].m_int16Table.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + table[i].m_int16Table[j] = (int16_t)tempTable[j]; + } + break; + } + case TableParameter::INT32_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + // TryGetValue does not suppport reading from int8 + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + table[i].m_int32Table.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + table[i].m_int32Table[j] = tempTable[j]; + } + break; + } + case TableParameter::UINT8_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + // TryGetValue does not suppport reading from int8 + table[i].m_int8Table.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + table[i].m_int8Table[j] = (uint8_t)tempTable[j]; + } + break; + } + case TableParameter::UINT16_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + // TryGetValue does not suppport reading from int8 + table[i].m_uint16Table.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + table[i].m_uint16Table[j] = (uint16_t)tempTable[j]; + } + break; + } + case TableParameter::UINT32_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + // TryGetValue does not suppport reading from int8 + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + table[i].m_uint32Table.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + table[i].m_uint32Table[j] = tempTable[j]; + } + break; + } + case TableParameter::FLOAT_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + // TryGetValue does not suppport reading from int8 + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + table[i].m_floatTable.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + ParseDataToFloat(tempTable[j], table[i].m_floatTable[j]); + } + break; + } + case TableParameter::HALF_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + // TryGetValue does not suppport reading from int8 + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + table[i].m_halfTable.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + uint16_t value = 0; + if (IsHexString(tempTable[j], &value)) { + table[i].m_halfTable[j] = value; + } else { + float val; + ParseDataToFloat(tempTable[j], val); + if (isdenorm(val)) + table[i].m_halfTable[j] = + signbit(val) ? Float16NegDenorm : Float16PosDenorm; + else + table[i].m_halfTable[j] = ConvertFloat32ToFloat16(val); + } + } + break; + } + case TableParameter::DOUBLE_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + // TryGetValue does not suppport reading from int8 + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + table[i].m_doubleTable.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + table[i].m_doubleTable[j] = tempTable[j]; + } + break; + } + case TableParameter::BOOL_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + // TryGetValue does not suppport reading from int8 + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + table[i].m_boolTable.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + table[i].m_boolTable[j] = tempTable[j]; + } + break; + } + case TableParameter::STRING_TABLE: { + WEX::TestExecution::TestDataArray tempTable; + if (FAILED(WEX::TestExecution::TestData::TryGetValue(table[i].m_name, + tempTable)) && + table[i].m_required) { + // TryGetValue does not suppport reading from int8 + hlsl_test::LogErrorFmt(L"Failed to get %s", table[i].m_name); + return E_FAIL; + } + table[i].m_StringTable.resize(tempTable.GetSize()); + for (size_t j = 0, end = tempTable.GetSize(); j != end; ++j) { + table[i].m_StringTable[j] = tempTable[j]; + } + break; + } + default: + DXASSERT_NOMSG("Invalid Parameter Type"); + } + if (errno == ERANGE) { + hlsl_test::LogErrorFmt(L"got out of range value for table %s", + table[i].m_name); + return E_FAIL; + } + } + return S_OK; +} diff --git a/tools/clang/unittests/HLSLExec/TableParameterHandler.h b/tools/clang/unittests/HLSLExec/TableParameterHandler.h new file mode 100644 index 0000000000..eac851a263 --- /dev/null +++ b/tools/clang/unittests/HLSLExec/TableParameterHandler.h @@ -0,0 +1,205 @@ +#ifndef TABLE_PARAMETER_HANDLER_H +#define TABLE_PARAMETER_HANDLER_H + +#include +#include +#include +#include +#include +#include +#include +#include // For LPCWSTR + +#include "dxc/Support/Global.h" // For DXASSERT_ARGS +#include "dxc/Test/HlslTestUtils.h" + +// Parameter representation for taef data-driven tests +struct TableParameter { + LPCWSTR m_name; + enum TableParameterType { + INT8, + INT16, + INT32, + UINT, + FLOAT, + HALF, + DOUBLE, + STRING, + BOOL, + INT8_TABLE, + INT16_TABLE, + INT32_TABLE, + FLOAT_TABLE, + HALF_TABLE, + DOUBLE_TABLE, + STRING_TABLE, + UINT8_TABLE, + UINT16_TABLE, + UINT32_TABLE, + BOOL_TABLE + }; + TableParameter(LPCWSTR name, TableParameterType type, bool required) + : m_name(name), m_type(type), m_required(required) {} + TableParameterType m_type; + bool m_required; // required parameter + int8_t m_int8; + int16_t m_int16; + int m_int32; + unsigned int m_uint; + float m_float; + uint16_t m_half; // no such thing as half type in c++. Use int16 instead + double m_double; + bool m_bool; + WEX::Common::String m_str; + std::vector m_int8Table; + std::vector m_int16Table; + std::vector m_int32Table; + std::vector m_uint8Table; + std::vector m_uint16Table; + std::vector m_uint32Table; + std::vector m_floatTable; + std::vector m_halfTable; // no such thing as half type in c++ + std::vector m_doubleTable; + std::vector m_boolTable; + std::vector m_StringTable; +}; + +class TableParameterHandler { +private: + HRESULT ParseTableRow(); + +public: + TableParameter *m_table; + size_t m_tableSize; + TableParameterHandler(TableParameter *pTable, size_t size); + + TableParameter *GetTableParamByName(LPCWSTR name); + void clearTableParameter(); + + template std::vector *GetDataArray(LPCWSTR name); +}; + +// Static helpers +static bool IsHexString(PCWSTR str, uint16_t *value) { + std::wstring wString(str); + wString.erase(std::remove(wString.begin(), wString.end(), L' '), + wString.end()); + LPCWSTR wstr = wString.c_str(); + if (wcsncmp(wstr, L"0x", 2) == 0 || wcsncmp(wstr, L"0b", 2) == 0) { + *value = (uint16_t)wcstol(wstr, NULL, 0); + return true; + } + return false; +} + +static HRESULT ParseDataToFloat(PCWSTR str, float &value) { + std::wstring wString(str); + wString.erase(std::remove(wString.begin(), wString.end(), L' '), + wString.end()); + wString.erase(std::remove(wString.begin(), wString.end(), L'\n'), + wString.end()); + PCWSTR wstr = wString.data(); + if (_wcsicmp(wstr, L"NaN") == 0) { + value = NAN; + } else if (_wcsicmp(wstr, L"-inf") == 0) { + value = -(INFINITY); + } else if (_wcsicmp(wstr, L"inf") == 0) { + value = INFINITY; + } else if (_wcsicmp(wstr, L"-denorm") == 0) { + value = -(FLT_MIN / 2); + } else if (_wcsicmp(wstr, L"denorm") == 0) { + value = FLT_MIN / 2; + } else if (_wcsicmp(wstr, L"-0.0f") == 0 || _wcsicmp(wstr, L"-0.0") == 0 || + _wcsicmp(wstr, L"-0") == 0) { + value = -0.0f; + } else if (_wcsicmp(wstr, L"0.0f") == 0 || _wcsicmp(wstr, L"0.0") == 0 || + _wcsicmp(wstr, L"0") == 0) { + value = 0.0f; + } else if (_wcsnicmp(wstr, L"0x", 2) == + 0) { // For hex values, take values literally + unsigned temp_i = std::stoul(wstr, nullptr, 16); + value = (float &)temp_i; + } else { + // evaluate the expression of wstring + double val = _wtof(wstr); + if (val == 0) { + hlsl_test::LogErrorFmt(L"Failed to parse parameter %s to float", wstr); + return E_FAIL; + } + value = (float)val; + } + return S_OK; +} + +static HRESULT ParseDataToUint(PCWSTR str, unsigned int &value) { + std::wstring wString(str); + wString.erase(std::remove(wString.begin(), wString.end(), L' '), + wString.end()); + PCWSTR wstr = wString.data(); + // evaluate the expression of string + if (_wcsicmp(wstr, L"0") == 0 || _wcsicmp(wstr, L"0x00000000") == 0) { + value = 0; + return S_OK; + } + wchar_t *end; + unsigned int val = std::wcstoul(wstr, &end, 0); + if (val == 0) { + hlsl_test::LogErrorFmt(L"Failed to parse parameter %s to int", wstr); + return E_FAIL; + } + value = val; + return S_OK; +} + +static HRESULT ParseDataToVectorFloat(PCWSTR str, float *ptr, size_t count) { + std::wstring wstr(str); + size_t curPosition = 0; + // parse a string of dot product separated by commas + for (size_t i = 0; i < count; ++i) { + size_t nextPosition = wstr.find(L",", curPosition); + if (FAILED(ParseDataToFloat( + wstr.substr(curPosition, nextPosition - curPosition).data(), + *(ptr + i)))) { + return E_FAIL; + } + curPosition = nextPosition + 1; + } + return S_OK; +} + +static HRESULT ParseDataToVectorHalf(PCWSTR str, uint16_t *ptr, size_t count) { + std::wstring wstr(str); + size_t curPosition = 0; + // parse a string of dot product separated by commas + for (size_t i = 0; i < count; ++i) { + size_t nextPosition = wstr.find(L",", curPosition); + float floatValue; + if (FAILED(ParseDataToFloat( + wstr.substr(curPosition, nextPosition - curPosition).data(), + floatValue))) { + return E_FAIL; + } + *(ptr + i) = ConvertFloat32ToFloat16(floatValue); + curPosition = nextPosition + 1; + } + return S_OK; +} + +static HRESULT ParseDataToVectorUint(PCWSTR str, unsigned int *ptr, + size_t count) { + std::wstring wstr(str); + size_t curPosition = 0; + // parse a string of dot product separated by commas + for (size_t i = 0; i < count; ++i) { + size_t nextPosition = wstr.find(L",", curPosition); + if (FAILED(ParseDataToUint( + wstr.substr(curPosition, nextPosition - curPosition).data(), + *(ptr + i)))) { + return E_FAIL; + } + curPosition = nextPosition + 1; + } + return S_OK; +} + +#endif // TABLE_PARAMETER_HANDLER_H diff --git a/tools/clang/unittests/HLSLTestLib/FileCheckerTest.cpp b/tools/clang/unittests/HLSLTestLib/FileCheckerTest.cpp index 2c75d45e5e..2d9ee7315d 100644 --- a/tools/clang/unittests/HLSLTestLib/FileCheckerTest.cpp +++ b/tools/clang/unittests/HLSLTestLib/FileCheckerTest.cpp @@ -519,28 +519,21 @@ FileRunCommandPart::RunDxc(dxc::DxcDllSupport &DllSupport, // Convert stage to minimum dxil/validator version: RequiredDxilMajor = std::max(RequiredDxilMajor, (unsigned)6) - 5; - bool bInternalValidator = - opts.SelectValidator == hlsl::options::ValidatorSelection::Internal; bool bValVerExplicit = opts.ValVerMajor != UINT_MAX; - // Normally we must check the validator version as well, but there are - // two scenarios where the validator version doesn't need to be checked - // against the version based on the shader model: - // 1. The test selects internal validator. - // 2. The test explicitly requests a specific validator version. - FileRunCommandResult result = - CheckDxilVer(DllSupport, RequiredDxilMajor, RequiredDxilMinor, - !(bInternalValidator || bValVerExplicit)); + // If validator version set explicitly, skip validator version check when + // checking required version for shader model. + FileRunCommandResult result = CheckDxilVer( + DllSupport, RequiredDxilMajor, RequiredDxilMinor, !bValVerExplicit); if (result.AbortPipeline) return result; // Additionally, if the test explicitly requests a specific non-zero - // validator version, and doesn't select internal validator or disable - // validation, we must check that the validator version is at least as - // high as the requested version. - // When ValVerMajor is 0, validation cannot be run against the module. - if (bValVerExplicit && opts.ValVerMajor != 0 && - !(bInternalValidator || opts.DisableValidation)) + // validator version, and doesn't disable validation, we must check + // that the validator version is at least as high as the requested + // version. When ValVerMajor is 0, validation cannot be run against + // the module. + if (bValVerExplicit && opts.ValVerMajor != 0 && !opts.DisableValidation) result = CheckDxilVer(DllSupport, opts.ValVerMajor, opts.ValVerMinor); if (result.AbortPipeline) return result; diff --git a/unittests/ADT/APIntTest.cpp b/unittests/ADT/APIntTest.cpp index ffba7b1633..a15307023e 100644 --- a/unittests/ADT/APIntTest.cpp +++ b/unittests/ADT/APIntTest.cpp @@ -11,6 +11,7 @@ #include "llvm/ADT/SmallString.h" #include "gtest/gtest.h" #include +#include #include using namespace llvm; @@ -753,7 +754,7 @@ TEST(APIntTest, StringDeath) { #endif TEST(APIntTest, mul_clear) { - APInt ValA(65, -1ULL); + APInt ValA(65, std::numeric_limits::max()); APInt ValB(65, 4); APInt ValC(65, 0); ValC = ValA * ValB; diff --git a/unittests/ADT/BitVectorTest.cpp b/unittests/ADT/BitVectorTest.cpp index 26f103b3c1..c7de9194c4 100644 --- a/unittests/ADT/BitVectorTest.cpp +++ b/unittests/ADT/BitVectorTest.cpp @@ -12,6 +12,7 @@ #include "llvm/ADT/BitVector.h" #include "llvm/ADT/SmallBitVector.h" #include "gtest/gtest.h" +#include using namespace llvm; @@ -73,7 +74,8 @@ TYPED_TEST(BitVectorTest, TrivialOperation) { Vec.resize(33, true); Vec.resize(57, false); unsigned Count = 0; - for (unsigned i = Vec.find_first(); i != -1u; i = Vec.find_next(i)) { + for (unsigned i = Vec.find_first(); i != std::numeric_limits::max(); + i = Vec.find_next(i)) { ++Count; EXPECT_TRUE(Vec[i]); EXPECT_TRUE(Vec.test(i)); @@ -103,7 +105,8 @@ TYPED_TEST(BitVectorTest, TrivialOperation) { Vec.resize(91, true); Vec.resize(130, false); Count = 0; - for (unsigned i = Vec.find_first(); i != -1u; i = Vec.find_next(i)) { + for (unsigned i = Vec.find_first(); i != std::numeric_limits::max(); + i = Vec.find_next(i)) { ++Count; EXPECT_TRUE(Vec[i]); EXPECT_TRUE(Vec.test(i)); diff --git a/unittests/Support/DataExtractorTest.cpp b/unittests/Support/DataExtractorTest.cpp index 81de983d22..250b89d696 100644 --- a/unittests/Support/DataExtractorTest.cpp +++ b/unittests/Support/DataExtractorTest.cpp @@ -7,8 +7,9 @@ // //===----------------------------------------------------------------------===// -#include "gtest/gtest.h" #include "llvm/Support/DataExtractor.h" +#include "gtest/gtest.h" +#include using namespace llvm; namespace { @@ -20,7 +21,8 @@ const char bigleb128data[] = "\xAA\xA9\xFF\xAA\xFF\xAA\xFF\x4A"; TEST(DataExtractorTest, OffsetOverflow) { DataExtractor DE(StringRef(numberData, sizeof(numberData)-1), false, 8); - EXPECT_FALSE(DE.isValidOffsetForDataOfSize(-2U, 5)); + EXPECT_FALSE(DE.isValidOffsetForDataOfSize( + std::numeric_limits::max() - 1, 5)); } TEST(DataExtractorTest, UnsignedNumbers) { diff --git a/utils/TableGen/FixedLenDecoderEmitter.cpp b/utils/TableGen/FixedLenDecoderEmitter.cpp index c5ef9d0e99..d356971f24 100644 --- a/utils/TableGen/FixedLenDecoderEmitter.cpp +++ b/utils/TableGen/FixedLenDecoderEmitter.cpp @@ -547,10 +547,11 @@ void Filter::recurse() { // Delegates to an inferior filter chooser for further processing on this // group of instructions whose segment values are variable. - FilterChooserMap.insert( - std::make_pair(-1U, llvm::make_unique( - Owner->AllInstructions, VariableInstructions, - Owner->Operands, BitValueArray, *Owner))); + FilterChooserMap.insert(std::make_pair( + std::numeric_limits::max(), + llvm::make_unique(Owner->AllInstructions, + VariableInstructions, Owner->Operands, + BitValueArray, *Owner))); } // No need to recurse for a singleton filtered instruction. diff --git a/utils/asan/x86_64-pc-linux-gnu.lsan.supp b/utils/asan/x86_64-pc-linux-gnu.lsan.supp new file mode 100644 index 0000000000..3a7725f535 --- /dev/null +++ b/utils/asan/x86_64-pc-linux-gnu.lsan.supp @@ -0,0 +1 @@ +leak:^call_init$ \ No newline at end of file diff --git a/utils/git/requirements_formatting.txt b/utils/git/requirements_formatting.txt index 6f3e07dcf2..2afb003c4f 100644 --- a/utils/git/requirements_formatting.txt +++ b/utils/git/requirements_formatting.txt @@ -42,11 +42,11 @@ pyjwt[crypto]==2.8.0 # via pygithub pynacl==1.5.0 # via pygithub -requests==2.32.0 +requests==2.32.4 # via pygithub toml==0.10.2 # via darker -urllib3==2.2.2 +urllib3==2.5.0 # via requests wrapt==1.15.0 # via deprecated diff --git a/utils/hct/gen_intrin_main.txt b/utils/hct/gen_intrin_main.txt index f1274fd308..f2c0cc5e2e 100644 --- a/utils/hct/gen_intrin_main.txt +++ b/utils/hct/gen_intrin_main.txt @@ -1,9 +1,6 @@ // Copyright (C) Microsoft Corporation. All rights reserved. // This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details. // -// Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -// All rights reserved. -// // See hctdb.py for the implementation of intrinsic file processing. // // Intrinsic declarations are grouped into namespaces that @@ -339,9 +336,9 @@ float<4,3> [[rn]] ObjectToWorld4x3(); float<4,3> [[rn]] WorldToObject4x3(); // Packed dot products with accumulate: -$type3 [[rn]] dot4add_u8packed(in uint a, in $type1 b, in uint c); -$type3 [[rn]] dot4add_i8packed(in uint a, in $type1 b, in int c); -$type3 [[rn]] dot2add(in float16_t<2> a, in $type1 b, in float c); +uint [[rn]] dot4add_u8packed(in uint a, in $type1 b, in uint c); +int [[rn]] dot4add_i8packed(in uint a, in $type1 b, in int c); +float [[rn]] dot2add(in float16_t<2> a, in $type1 b, in float c); // Unpacking intrinsics int16_t<4> [[rn]] unpack_s8s16(in p32i8 pk); @@ -383,6 +380,14 @@ void [[]] Barrier(in NodeRecordOrUAV o, in uint SemanticFlags); uint [[]] GetRemainingRecursionLevels(); +void [[min_sm=6.9]] __builtin_MatVecMul(out LinAlg OutputVector, in bool OutputIsUnsigned, in LinAlg InputVector, in bool InputIsUnsigned, in uint InputInterpretation, in ByteAddressBuffer MatrixBuffer, in uint MatrixOffset, in uint MatrixInterpretation, in uint M, in uint K, in uint MatrixLayout, in bool MatrixIsTransposed, in uint MatrixStride); + +void [[min_sm=6.9]] __builtin_MatVecMulAdd(out LinAlg OutputVector, in bool OutputIsUnsigned, in LinAlg InputVector, in bool InputIsUnsigned, in uint InputInterpretation, in ByteAddressBuffer MatrixBuffer, in uint MatrixOffset, in uint MatrixInterpretation, in uint M, in uint K, in uint MatrixLayout, in bool MatrixIsTransposed, in uint MatrixStride, in ByteAddressBuffer BiasVector, in uint BiasOffset, in uint BiasInterpretation); + +void [[min_sm=6.9]] __builtin_OuterProductAccumulate(in LinAlg InputVector1, in LinAlg InputVector2, in RWByteAddressBuffer MatrixBuffer, in uint MatrixOffset, in uint MatrixInterpretation, in uint MatrixLayout, in uint MatrixStride); + +void [[min_sm=6.9]] __builtin_VectorAccumulate(in LinAlg InputVector, in RWByteAddressBuffer MatrixBuffer, in uint MatrixOffset); + } namespace @@ -1126,7 +1131,7 @@ namespace DxHitObjectMethods { uint [[rn,class_prefix,min_sm=6.9]] GetPrimitiveIndex(); uint [[rn,class_prefix,min_sm=6.9]] GetHitKind(); uint [[rn,class_prefix,min_sm=6.9]] GetShaderTableIndex(); - $funcT [[class_prefix,min_sm=6.9]] GetAttributes(); + void [[class_prefix,min_sm=6.9]] GetAttributes(out udt Attributes); void [[class_prefix,min_sm=6.9]] SetShaderTableIndex(in uint RecordIndex); uint [[ro,class_prefix,min_sm=6.9]] LoadLocalRootTableConstant(in uint RootConstantOffsetInBytes); } namespace diff --git a/utils/hct/hctdb.py b/utils/hct/hctdb.py index 6344fb5849..2b94b13134 100644 --- a/utils/hct/hctdb.py +++ b/utils/hct/hctdb.py @@ -1,7 +1,5 @@ # Copyright (C) Microsoft Corporation. All rights reserved. # This file is distributed under the University of Illinois Open Source License. See LICENSE.TXT for details. -# Modifications Copyright(C) 2025 Advanced Micro Devices, Inc. -# All rights reserved. ############################################################################### # DXIL information. # ############################################################################### @@ -873,6 +871,11 @@ def populate_categories_and_models(self): "library", "raygeneration", ) + for i in ( + "MatVecMul,MatVecMulAdd,OuterProductAccumulate,VectorAccumulate" + ).split(","): + self.name_idx[i].category = "Linear Algebra Operations" + self.name_idx[i].shader_model = 6, 9 def populate_llvm_instructions(self): # Add instructions that map to LLVM instructions. @@ -2624,7 +2627,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per stamp", - "hf<", + "hf", "rn", [ db_dxil_param( @@ -2642,7 +2645,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per stamp", - "hf<", + "hf", "rn", [ db_dxil_param( @@ -2660,7 +2663,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per pixel", - "hf<", + "hf", "rn", [ db_dxil_param( @@ -2678,7 +2681,7 @@ def UFI(name, **mappings): next_op_idx, "Unary", "computes the rate of change of components per pixel", - "hf<", + "hf", "rn", [ db_dxil_param( @@ -6340,6 +6343,103 @@ def UFI(name, **mappings): ) next_op_idx += 1 + self.add_dxil_op( + "MatVecMul", + next_op_idx, + "MatVecMul", + "Multiplies a MxK dimension matrix and a K sized input vector", + "